Netdev List

Netdev List
 help / color / mirror / Atom feed

* Re: [PATCH 08/23] arm64: topology: Use RCU to protect access to HK_TYPE_TICK cpumask
From: Chen Ridong @ 2026-04-22  9:34 UTC (permalink / raw)
  To: Waiman Long, Tejun Heo, Johannes Weiner, Michal Koutný,
	Jonathan Corbet, Shuah Khan, Catalin Marinas, Will Deacon,
	K. Y. Srinivasan, Haiyang Zhang, Wei Liu, Dexuan Cui, Long Li,
	Guenter Roeck, Frederic Weisbecker, Paul E. McKenney,
	Neeraj Upadhyay, Joel Fernandes, Josh Triplett, Boqun Feng,
	Uladzislau Rezki, Steven Rostedt, Mathieu Desnoyers,
	Lai Jiangshan, Zqiang, Anna-Maria Behnsen, Ingo Molnar,
	Thomas Gleixner, Peter Zijlstra, Juri Lelli, Vincent Guittot,
	Dietmar Eggemann, Ben Segall, Mel Gorman, Valentin Schneider,
	K Prateek Nayak, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman
  Cc: cgroups, linux-doc, linux-kernel, linux-arm-kernel, linux-hyperv,
	linux-hwmon, rcu, netdev, linux-kselftest, Costa Shulyupin,
	Qiliang Yuan
In-Reply-To: <20260421030351.281436-9-longman@redhat.com>



On 2026/4/21 11:03, Waiman Long wrote:
> As the HK_TYPE_TICK cpumask is going to be changeable at run time, we
> need to use RCU to protect access to the cpumask to prevent it from
> going away in the middle of the operation.
> 
> Signed-off-by: Waiman Long <longman@redhat.com>
> ---
>  arch/arm64/kernel/topology.c | 17 ++++++++++++++---
>  1 file changed, 14 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
> index b32f13358fbb..48f150801689 100644
> --- a/arch/arm64/kernel/topology.c
> +++ b/arch/arm64/kernel/topology.c
> @@ -173,6 +173,7 @@ void arch_cpu_idle_enter(void)
>  	if (!amu_fie_cpu_supported(cpu))
>  		return;
>  
> +	guard(rcu)();
>  	/* Kick in AMU update but only if one has not happened already */
>  	if (housekeeping_cpu(cpu, HK_TYPE_TICK) &&
>  	    time_is_before_jiffies(per_cpu(cpu_amu_samples.last_scale_update, cpu)))
> @@ -187,11 +188,16 @@ int arch_freq_get_on_cpu(int cpu)
>  	unsigned int start_cpu = cpu;
>  	unsigned long last_update;
>  	unsigned int freq = 0;
> +	bool hk_cpu;
>  	u64 scale;
>  
>  	if (!amu_fie_cpu_supported(cpu) || !arch_scale_freq_ref(cpu))
>  		return -EOPNOTSUPP;
>  
> +	scoped_guard(rcu) {
> +		hk_cpu = housekeeping_cpu(cpu, HK_TYPE_TICK);
> +	}
> +

Should we put this into a while loop, since cpu might be changed to ref_cpu?

>  	while (1) {
>  
>  		amu_sample = per_cpu_ptr(&cpu_amu_samples, cpu);
> @@ -204,16 +210,21 @@ int arch_freq_get_on_cpu(int cpu)
>  		 * (and thus freq scale), if available, for given policy: this boils
>  		 * down to identifying an active cpu within the same freq domain, if any.
>  		 */
> -		if (!housekeeping_cpu(cpu, HK_TYPE_TICK) ||
> +		if (!hk_cpu ||
>  		    time_is_before_jiffies(last_update + msecs_to_jiffies(AMU_SAMPLE_EXP_MS))) {
>  			struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
> +			bool hk_intersects;
>  			int ref_cpu;
>  
>  			if (!policy)
>  				return -EINVAL;
>  
> -			if (!cpumask_intersects(policy->related_cpus,
> -						housekeeping_cpumask(HK_TYPE_TICK))) {
> +			scoped_guard(rcu) {
> +				hk_intersects = cpumask_intersects(policy->related_cpus,
> +							housekeeping_cpumask(HK_TYPE_TICK));
> +			}
> +
> +			if (!hk_intersects) {
>  				cpufreq_cpu_put(policy);
>  				return -EOPNOTSUPP;
>  			}

-- 
Best regards,
Ridong


^ permalink raw reply

* [PATCH] net: atheros: atl1e: use atomic functions with memory barriers for next_to_clean
From: Gui-Dong Han @ 2026-04-22  9:37 UTC (permalink / raw)
  To: Chris Snook
  Cc: Andrew Lunn, David S . Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, netdev, linux-kernel, baijiaju1990, Gui-Dong Han

next_to_clean synchronizes Tx ring cleanup in atl1e_clean_tx_irq()
with slot reuse in atl1e_tpd_avail().

atomic_set() and atomic_read() do not provide ordering. Without memory
barriers, out-of-order execution can lead to concurrency bugs on weak
memory architectures like ARM. For example, this can let the transmit
path reuse a slot before tx_buffer->skb = NULL is visible and trigger
the BUG_ON() in atl1e_tx_map(). This is a constructed scenario, and
there might be other undiscovered, potentially more harmful bugs caused
by this lack of ordering.

Use atomic_set_release() and atomic_read_acquire() for next_to_clean.

Fixes: a6a5325239c2 ("atl1e: Atheros L1E Gigabit Ethernet driver")
Signed-off-by: Gui-Dong Han <hanguidong02@gmail.com>
---
Found by auditing atomic operations used for synchronization.
A similar fix can be found in 6df8e84aa6b5.

Do not change atl1e_init_ring_ptrs(). Its atomic_set() runs during
bring-up before NAPI and interrupts are enabled, so it is not a runtime
publication point between Tx cleanup and Tx submission.

In my opinion, implementing ad-hoc lockless algorithms directly within
individual drivers is highly error-prone. To avoid these subtle memory
ordering and barrier bugs, drivers should rely on established, well-tested
kernel libraries like kfifo to handle this type of concurrency.
---
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 40290028580b..4ac8d4786820 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -1232,7 +1232,7 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter)
 	struct atl1e_tx_ring *tx_ring = &adapter->tx_ring;
 	struct atl1e_tx_buffer *tx_buffer = NULL;
 	u16 hw_next_to_clean = AT_READ_REGW(&adapter->hw, REG_TPD_CONS_IDX);
-	u16 next_to_clean = atomic_read(&tx_ring->next_to_clean);
+	u16 next_to_clean = atomic_read_acquire(&tx_ring->next_to_clean);
 
 	while (next_to_clean != hw_next_to_clean) {
 		tx_buffer = &tx_ring->tx_buffer[next_to_clean];
@@ -1259,7 +1259,7 @@ static bool atl1e_clean_tx_irq(struct atl1e_adapter *adapter)
 			next_to_clean = 0;
 	}
 
-	atomic_set(&tx_ring->next_to_clean, next_to_clean);
+	atomic_set_release(&tx_ring->next_to_clean, next_to_clean);
 
 	if (netif_queue_stopped(adapter->netdev) &&
 			netif_carrier_ok(adapter->netdev)) {
@@ -1562,7 +1562,7 @@ static inline u16 atl1e_tpd_avail(struct atl1e_adapter *adapter)
 	u16 next_to_use = 0;
 	u16 next_to_clean = 0;
 
-	next_to_clean = atomic_read(&tx_ring->next_to_clean);
+	next_to_clean = atomic_read_acquire(&tx_ring->next_to_clean);
 	next_to_use   = tx_ring->next_to_use;
 
 	return (u16)(next_to_clean > next_to_use) ?
-- 
2.43.0


^ permalink raw reply related

* Re: [PATCH net] hv_sock: Return -EIO for malformed/short packets
From: Stefano Garzarella @ 2026-04-22  9:40 UTC (permalink / raw)
  To: Dexuan Cui
  Cc: kys, haiyangz, wei.liu, longli, davem, edumazet, kuba, pabeni,
	horms, niuxuewei.nxw, linux-hyperv, virtualization, netdev,
	linux-kernel, stable
In-Reply-To: <20260421174931.1152238-1-decui@microsoft.com>

On Tue, Apr 21, 2026 at 10:49:31AM -0700, Dexuan Cui wrote:
>Commit f63152958994 fixes a regression, however it fails to report an
>error for malformed/short packets -- normally we should never see such
>packets, but let's report an error for them just in case.
>
>Fixes: f63152958994 ("hv_sock: Report EOF instead of -EIO for FIN")
>Cc: stable@vger.kernel.org
>Signed-off-by: Dexuan Cui <decui@microsoft.com>
>---
>
>Commit f63152958994 is currently only in net.git's master branch.
>
> net/vmw_vsock/hyperv_transport.c | 29 +++++++++++++++++++----------
> 1 file changed, 19 insertions(+), 10 deletions(-)
>
>diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
>index 76e78c83fdbc..8faaa14bccda 100644
>--- a/net/vmw_vsock/hyperv_transport.c
>+++ b/net/vmw_vsock/hyperv_transport.c
>@@ -704,18 +704,27 @@ static s64 hvs_stream_has_data(struct vsock_sock *vsk)
> 		if (hvs->recv_desc) {
> 			/* Here hvs->recv_data_len is 0, so hvs->recv_desc must
> 			 * be NULL unless it points to the 0-byte-payload FIN
>-			 * packet: see hvs_update_recv_data().
>+			 * packet or a malformed/short packet: see
>+			 * hvs_update_recv_data().
> 			 *
>-			 * Here all the payload has been dequeued, but
>-			 * hvs_channel_readable_payload() still returns 1,
>-			 * because the VMBus ringbuffer's read_index is not
>-			 * updated for the FIN packet: hvs_stream_dequeue() ->
>-			 * hv_pkt_iter_next() updates the cached priv_read_index
>-			 * but has no opportunity to update the read_index in
>-			 * hv_pkt_iter_close() as hvs_stream_has_data() returns
>-			 * 0 for the FIN packet, so it won't get dequeued.
>+			 * If hvs->recv_desc points to the FIN packet, here all
>+			 * the payload has been dequeued and the peer_shutdown
>+			 * flag is set, but hvs_channel_readable_payload() still
>+			 * returns 1, because the VMBus ringbuffer's read_index
>+			 * is not updated for the FIN packet:
>+			 * hvs_stream_dequeue() -> hv_pkt_iter_next() updates
>+			 * the cached priv_read_index but has no opportunity to
>+			 * update the read_index in hv_pkt_iter_close() as
>+			 * hvs_stream_has_data() returns 0 for the FIN packet,
>+			 * so it won't get dequeued.
>+			 *
>+			 * In case hvs->recv_desc points to a malformed/short
>+			 * packet, return -EIO.
> 			 */
>-			return 0;
>+			if (hvs->vsk->peer_shutdown & SEND_SHUTDOWN)

We can access `vsk` directly, I mean `vsk->peer_shutdown`.

>+				return 0;
>+			else

nit: we usually avoid the `else` if the other branch returns early, and 
maybe have the error returned first, so it's more clear when reading the 
comment on top.  I mean something like this:

			if (!(vsk->peer_shutdown & SEND_SHUTDOWN))
				return -EIO;

			return 0;

BTW, not a strong opinion on that.

The rest, LGTM!

Thanks,
Stefano


^ permalink raw reply

* Re: [PATCH net v3 1/1] net: hsr: limit node table growth
From: Felix Maurer @ 2026-04-22  9:45 UTC (permalink / raw)
  To: Sebastian Andrzej Siewior
  Cc: Ren Wei, netdev, davem, edumazet, kuba, pabeni, horms, kees,
	kexinsun, luka.gejak, Arvid.Brodin, m-karicheri2, yuantan098,
	yifanwucs, tomapufckgml, bird, xuyuqiabc, royenheart
In-Reply-To: <20260422085242.3TkVbXc2@linutronix.de>

On Wed, Apr 22, 2026 at 10:52:42AM +0200, Sebastian Andrzej Siewior wrote:
> On 2026-04-22 10:31:39 [+0200], Felix Maurer wrote:
> > On Tue, Apr 21, 2026 at 10:50:01PM +0800, Ren Wei wrote:
> > > diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
> > > index d09875b33588..8a5a2a54a81f 100644
> > > --- a/net/hsr/hsr_framereg.c
> > > +++ b/net/hsr/hsr_framereg.c
> > > @@ -189,6 +195,7 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
> > >  				     enum hsr_port_type rx_port)
> > >  {
> > >  	struct hsr_node *new_node, *node = NULL;
> > > +	unsigned int node_count = 0;
> > >  	unsigned long now;
> > >  	size_t block_sz;
> > >  	int i;
> > > @@ -226,20 +233,31 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
> > >  	spin_lock_bh(&hsr->list_lock);
> > >  	list_for_each_entry_rcu(node, node_db, mac_list,
> > >  				lockdep_is_held(&hsr->list_lock)) {
> > > +		node_count++;
> >
> > I'm not sure if this on-the-fly node counting is the best solution here.
> > My concern is that it comes quite late in the process, i.e., after we
> > already allocated a bunch of memory, etc. As we are discussing a
> > scenario where a lot of entries are created, maybe we shouldn't even
> > allocate a new_node if the table is already full? For example by storing
> > the node_count in hsr_priv and checking it early in the function?
>
> The node is allocated upfront. Then it iterates here and we only end up
> counting through the full list if there is no match. This is under a
> lock so "many clients" are serialized. If we allocate the node later
> then we need to do it under the lock.
>
> I don't think the node count exceeds 100 in production. So having a
> counter which is incremented while adding to the list and decremented
> while removing items from the list would optimize the "worst case". So
> instead traversing the list with 1000 we would just give up.

The counter is what I had in mind. I agree that allocating under the
lock isn't what we want.

I'd argue counting through the whole list is the normal case.
hsr_add_node() is only called after the node table has been searched
already (without the lock). Here we go through the whole list again
under the lock to prevent TOCTOU-type situations.

I agree that, overall, it would be optimizing the worst case, but I
think it may be worth it to prevent the memory allocations and walking
the whole list. But I'd go along with the (current) on-the-fly counting
as well.

Thanks,
   Felix


^ permalink raw reply

* [RFC PATCH net-next 0/5] net: wangxun: timeout and error
From: Jiawen Wu @ 2026-04-22  9:56 UTC (permalink / raw)
  To: netdev
  Cc: Mengyuan Lou, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran, Russell King,
	Simon Horman, Kees Cook, Larysa Zaremba, Breno Leitao, Joe Damato,
	Jacob Keller, Fabio Baltieri, Jiawen Wu

This series is a split of the previous series:
https://lore.kernel.org/all/20260326021406.30444-1-jiawenwu@trustnetic.com

It is about adding the Tx timeout process and pci_error_handlers.
The changes from the last full patch set V6:
- Add 'else' handling in ngbe_do_reset().
- Acquire rtnl_lock() before checking netif_running() in
  wx_reset_subtask().
- Use test_and_clear_bit() instead of test_bit()…clear_bit() to avoid
  losing another reset request.
- Change ‘u64 tx_done_old’ to ‘u32’ to avoid data race between
  dev_watchdog and NAPI polling.
- Check the return value of ndo_open() in wx_io_resume().
- Drop pci_save_state().

Jiawen Wu (5):
  net: ngbe: implement libwx reset ops
  net: wangxun: add Tx timeout process
  net: wangxun: add reinit parameter to wx->do_reset callback
  net: wangxun: extract the close_suspend sequence
  net: wangxun: implement pci_error_handlers ops

 drivers/net/ethernet/wangxun/libwx/Makefile   |   2 +-
 drivers/net/ethernet/wangxun/libwx/wx_err.c   | 232 ++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_err.h   |  16 ++
 .../net/ethernet/wangxun/libwx/wx_ethtool.c   |   2 +-
 drivers/net/ethernet/wangxun/libwx/wx_hw.c    |  17 +-
 drivers/net/ethernet/wangxun/libwx/wx_lib.c   |  41 +++-
 drivers/net/ethernet/wangxun/libwx/wx_lib.h   |   1 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h  |  16 +-
 .../net/ethernet/wangxun/ngbe/ngbe_ethtool.c  |   1 -
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c |  68 ++++-
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h |   2 +
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   |  26 +-
 .../net/ethernet/wangxun/txgbe/txgbe_type.h   |   3 +-
 13 files changed, 398 insertions(+), 29 deletions(-)
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_err.c
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_err.h

-- 
2.51.0


^ permalink raw reply

* [RFC PATCH net-next 1/5] net: ngbe: implement libwx reset ops
From: Jiawen Wu @ 2026-04-22  9:56 UTC (permalink / raw)
  To: netdev
  Cc: Mengyuan Lou, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran, Russell King,
	Simon Horman, Kees Cook, Larysa Zaremba, Breno Leitao, Joe Damato,
	Jacob Keller, Fabio Baltieri, Jiawen Wu
In-Reply-To: <20260422095617.27080-1-jiawenwu@trustnetic.com>

Implement wx->do_reset() for library module calling.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 .../net/ethernet/wangxun/ngbe/ngbe_ethtool.c  |  1 -
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 37 ++++++++++++++++++-
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h |  1 +
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
index b2e191982803..1960f7154151 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c
@@ -59,7 +59,6 @@ static int ngbe_set_ringparam(struct net_device *netdev,
 	wx_set_ring(wx, new_tx_count, new_rx_count, temp_ring);
 	kvfree(temp_ring);
 
-	wx_configure(wx);
 	ngbe_up(wx);
 
 clear_reset:
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index d8e3827a8b1f..bd905e267575 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -133,6 +133,7 @@ static int ngbe_sw_init(struct wx *wx)
 
 	wx->mbx.size = WX_VXMAILBOX_SIZE;
 	wx->setup_tc = ngbe_setup_tc;
+	wx->do_reset = ngbe_do_reset;
 	set_bit(0, &wx->fwd_bitmask);
 
 	return 0;
@@ -422,7 +423,7 @@ void ngbe_down(struct wx *wx)
 	wx_clean_all_rx_rings(wx);
 }
 
-void ngbe_up(struct wx *wx)
+static void ngbe_up_complete(struct wx *wx)
 {
 	wx_configure_vectors(wx);
 
@@ -488,7 +489,7 @@ static int ngbe_open(struct net_device *netdev)
 
 	wx_ptp_init(wx);
 
-	ngbe_up(wx);
+	ngbe_up_complete(wx);
 
 	return 0;
 err_dis_phy:
@@ -501,6 +502,12 @@ static int ngbe_open(struct net_device *netdev)
 	return err;
 }
 
+void ngbe_up(struct wx *wx)
+{
+	wx_configure(wx);
+	ngbe_up_complete(wx);
+}
+
 /**
  * ngbe_close - Disables a network interface
  * @netdev: network interface device structure
@@ -588,6 +595,8 @@ int ngbe_setup_tc(struct net_device *dev, u8 tc)
 	 */
 	if (netif_running(dev))
 		ngbe_close(dev);
+	else
+		ngbe_reset(wx);
 
 	wx_clear_interrupt_scheme(wx);
 
@@ -604,6 +613,30 @@ int ngbe_setup_tc(struct net_device *dev, u8 tc)
 	return 0;
 }
 
+static void ngbe_reinit_locked(struct wx *wx)
+{
+	netif_trans_update(wx->netdev);
+
+	mutex_lock(&wx->reset_lock);
+	set_bit(WX_STATE_RESETTING, wx->state);
+
+	ngbe_down(wx);
+	ngbe_up(wx);
+
+	clear_bit(WX_STATE_RESETTING, wx->state);
+	mutex_unlock(&wx->reset_lock);
+}
+
+void ngbe_do_reset(struct net_device *netdev)
+{
+	struct wx *wx = netdev_priv(netdev);
+
+	if (netif_running(netdev))
+		ngbe_reinit_locked(wx);
+	else
+		ngbe_reset(wx);
+}
+
 static const struct net_device_ops ngbe_netdev_ops = {
 	.ndo_open               = ngbe_open,
 	.ndo_stop               = ngbe_close,
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index 7077a0da4c98..4f648f272c08 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -125,5 +125,6 @@ extern char ngbe_driver_name[];
 void ngbe_down(struct wx *wx);
 void ngbe_up(struct wx *wx);
 int ngbe_setup_tc(struct net_device *dev, u8 tc);
+void ngbe_do_reset(struct net_device *netdev);
 
 #endif /* _NGBE_TYPE_H_ */
-- 
2.51.0


^ permalink raw reply related

* [RFC PATCH net-next 5/5] net: wangxun: implement pci_error_handlers ops
From: Jiawen Wu @ 2026-04-22  9:56 UTC (permalink / raw)
  To: netdev
  Cc: Mengyuan Lou, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran, Russell King,
	Simon Horman, Kees Cook, Larysa Zaremba, Breno Leitao, Joe Damato,
	Jacob Keller, Fabio Baltieri, Jiawen Wu
In-Reply-To: <20260422095617.27080-1-jiawenwu@trustnetic.com>

Support AER driver to handle the PCIe errors.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_err.c   | 107 ++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_err.h   |   2 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h  |   1 +
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c |   9 +-
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   |   5 +-
 5 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
index e7c9dcb148b5..1aefae402c8e 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
@@ -3,11 +3,118 @@
 
 #include <linux/netdevice.h>
 #include <linux/pci.h>
+#include <linux/aer.h>
 
 #include "wx_type.h"
 #include "wx_lib.h"
 #include "wx_err.h"
 
+/**
+ * wx_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * Return: pci_ers_result_t.
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t wx_io_error_detected(struct pci_dev *pdev,
+					     pci_channel_state_t state)
+{
+	struct wx *wx = pci_get_drvdata(pdev);
+	struct net_device *netdev;
+
+	netdev = wx->netdev;
+	if (!netif_device_present(netdev))
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	rtnl_lock();
+	netif_device_detach(netdev);
+
+	if (netif_running(netdev))
+		wx->close_suspend(wx);
+
+	if (state == pci_channel_io_perm_failure) {
+		rtnl_unlock();
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
+		pci_disable_device(pdev);
+	rtnl_unlock();
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * wx_io_slot_reset - called after the pci bus has been reset.
+ * @pdev: Pointer to PCI device
+ *
+ * Return: pci_ers_result_t.
+ *
+ * Restart the card from scratch, as if from a cold-boot.
+ */
+static pci_ers_result_t wx_io_slot_reset(struct pci_dev *pdev)
+{
+	struct wx *wx = pci_get_drvdata(pdev);
+	pci_ers_result_t result;
+
+	if (pci_enable_device_mem(pdev)) {
+		wx_err(wx, "Cannot re-enable PCI device after reset.\n");
+		result = PCI_ERS_RESULT_DISCONNECT;
+	} else {
+		/* make all bar access done before reset. */
+		smp_mb__before_atomic();
+		clear_bit(WX_STATE_DISABLED, wx->state);
+		pci_set_master(pdev);
+		pci_restore_state(pdev);
+		pci_wake_from_d3(pdev, false);
+
+		wx->do_reset(wx->netdev, false);
+		result = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	pci_aer_clear_nonfatal_status(pdev);
+
+	return result;
+}
+
+/**
+ * wx_io_resume - called when traffic can start flowing again.
+ * @pdev: Pointer to PCI device
+ *
+ * This callback is called when the error recovery driver tells us that
+ * its OK to resume normal operation.
+ */
+static void wx_io_resume(struct pci_dev *pdev)
+{
+	struct wx *wx = pci_get_drvdata(pdev);
+	struct net_device *netdev;
+	int err;
+
+	netdev = wx->netdev;
+	rtnl_lock();
+	if (netif_running(netdev)) {
+		err = netdev->netdev_ops->ndo_open(netdev);
+		if (err) {
+			wx_err(wx, "Failed to open netdev after reset\n");
+			goto out;
+		}
+	}
+	netif_device_attach(netdev);
+out:
+	rtnl_unlock();
+}
+
+const struct pci_error_handlers wx_err_handler = {
+	.error_detected = wx_io_error_detected,
+	.slot_reset = wx_io_slot_reset,
+	.resume = wx_io_resume,
+};
+EXPORT_SYMBOL(wx_err_handler);
+
 static void wx_reset_subtask(struct wx *wx)
 {
 	if (!test_bit(WX_FLAG_NEED_PF_RESET, wx->flags))
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.h b/drivers/net/ethernet/wangxun/libwx/wx_err.h
index e317e6c8d928..8b1a7863b5b1 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_err.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_err.h
@@ -7,6 +7,8 @@
 #ifndef _WX_ERR_H_
 #define _WX_ERR_H_
 
+extern const struct pci_error_handlers wx_err_handler;
+
 void wx_handle_errors_subtask(struct wx *wx);
 void wx_tx_timeout(struct net_device *netdev, unsigned int txqueue);
 void wx_handle_tx_hang(struct wx_ring *tx_ring, unsigned int next);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 4b72835ddec1..81e12609d3fa 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1215,6 +1215,7 @@ enum wx_state {
 	WX_STATE_PTP_RUNNING,
 	WX_STATE_PTP_TX_IN_PROGRESS,
 	WX_STATE_SERVICE_SCHED,
+	WX_STATE_DISABLED,
 	WX_STATE_NBITS		/* must be last */
 };
 
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index bd6c0c9c51ba..a174605d1105 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -570,7 +570,8 @@ static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake)
 	*enable_wake = !!wufc;
 	wx_control_hw(wx, false);
 
-	pci_disable_device(pdev);
+	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
+		pci_disable_device(pdev);
 }
 
 static void ngbe_shutdown(struct pci_dev *pdev)
@@ -856,6 +857,7 @@ static int ngbe_probe(struct pci_dev *pdev,
 		goto err_register;
 
 	pci_set_drvdata(pdev, wx);
+	pci_save_state(pdev);
 
 	return 0;
 
@@ -907,7 +909,8 @@ static void ngbe_remove(struct pci_dev *pdev)
 	kfree(wx->mac_table);
 	wx_clear_interrupt_scheme(wx);
 
-	pci_disable_device(pdev);
+	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
+		pci_disable_device(pdev);
 }
 
 static int ngbe_suspend(struct pci_dev *pdev, pm_message_t state)
@@ -934,6 +937,7 @@ static int ngbe_resume(struct pci_dev *pdev)
 		wx_err(wx, "Cannot enable PCI device from suspend\n");
 		return err;
 	}
+	clear_bit(WX_STATE_DISABLED, wx->state);
 	pci_set_master(pdev);
 	device_wakeup_disable(&pdev->dev);
 
@@ -958,6 +962,7 @@ static struct pci_driver ngbe_driver = {
 	.resume   = ngbe_resume,
 	.shutdown = ngbe_shutdown,
 	.sriov_configure = wx_pci_sriov_configure,
+	.err_handler = &wx_err_handler,
 };
 
 module_pci_driver(ngbe_driver);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 20969dd7d47a..56e8482c1896 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -899,6 +899,7 @@ static int txgbe_probe(struct pci_dev *pdev,
 		goto err_remove_phy;
 
 	pci_set_drvdata(pdev, wx);
+	pci_save_state(pdev);
 
 	netif_tx_stop_all_queues(netdev);
 
@@ -969,7 +970,8 @@ static void txgbe_remove(struct pci_dev *pdev)
 	kfree(wx->mac_table);
 	wx_clear_interrupt_scheme(wx);
 
-	pci_disable_device(pdev);
+	if (!test_and_set_bit(WX_STATE_DISABLED, wx->state))
+		pci_disable_device(pdev);
 }
 
 static struct pci_driver txgbe_driver = {
@@ -979,6 +981,7 @@ static struct pci_driver txgbe_driver = {
 	.remove   = txgbe_remove,
 	.shutdown = txgbe_shutdown,
 	.sriov_configure = wx_pci_sriov_configure,
+	.err_handler = &wx_err_handler,
 };
 
 module_pci_driver(txgbe_driver);
-- 
2.51.0


^ permalink raw reply related

* [RFC PATCH net-next 3/5] net: wangxun: add reinit parameter to wx->do_reset callback
From: Jiawen Wu @ 2026-04-22  9:56 UTC (permalink / raw)
  To: netdev
  Cc: Mengyuan Lou, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran, Russell King,
	Simon Horman, Kees Cook, Larysa Zaremba, Breno Leitao, Joe Damato,
	Jacob Keller, Fabio Baltieri, Jiawen Wu
In-Reply-To: <20260422095617.27080-1-jiawenwu@trustnetic.com>

To implement a simple hardware reset without tearing down the network
interface state, introduce a boolean 'reinit' parameter to wx->do_reset
callback.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_err.c     | 2 +-
 drivers/net/ethernet/wangxun/libwx/wx_ethtool.c | 2 +-
 drivers/net/ethernet/wangxun/libwx/wx_lib.c     | 4 ++--
 drivers/net/ethernet/wangxun/libwx/wx_type.h    | 2 +-
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c   | 4 ++--
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h   | 2 +-
 drivers/net/ethernet/wangxun/txgbe/txgbe_main.c | 4 ++--
 drivers/net/ethernet/wangxun/txgbe/txgbe_type.h | 2 +-
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
index 42e00f0bd8da..e7c9dcb148b5 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_err.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
@@ -23,7 +23,7 @@ static void wx_reset_subtask(struct wx *wx)
 
 	if (test_and_clear_bit(WX_FLAG_NEED_PF_RESET, wx->flags)) {
 		if (wx->do_reset)
-			wx->do_reset(wx->netdev);
+			wx->do_reset(wx->netdev, true);
 	}
 
 	rtnl_unlock();
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
index 5df971aca9e3..d1356ff5d69b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c
@@ -395,7 +395,7 @@ static void wx_update_rsc(struct wx *wx)
 
 	/* reset the device to apply the new RSC setting */
 	if (need_reset && wx->do_reset)
-		wx->do_reset(netdev);
+		wx->do_reset(netdev, true);
 }
 
 int wx_set_coalesce(struct net_device *netdev,
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 9e6167b43f75..3216dee778be 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -3146,7 +3146,7 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features)
 	netdev->features = features;
 
 	if (changed & NETIF_F_HW_VLAN_CTAG_RX && wx->do_reset)
-		wx->do_reset(netdev);
+		wx->do_reset(netdev, true);
 	else if (changed & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER))
 		wx_set_rx_mode(netdev);
 
@@ -3196,7 +3196,7 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features)
 
 out:
 	if (need_reset && wx->do_reset)
-		wx->do_reset(netdev);
+		wx->do_reset(netdev, true);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index f65c2d7bae39..671ac0a19dee 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1402,7 +1402,7 @@ struct wx {
 	void (*atr)(struct wx_ring *ring, struct wx_tx_buffer *first, u8 ptype);
 	void (*configure_fdir)(struct wx *wx);
 	int (*setup_tc)(struct net_device *netdev, u8 tc);
-	void (*do_reset)(struct net_device *netdev);
+	void (*do_reset)(struct net_device *netdev, bool reinit);
 	int (*ptp_setup_sdp)(struct wx *wx);
 	void (*set_num_queues)(struct wx *wx);
 
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index e9561996b970..ec14dd47cd42 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -629,11 +629,11 @@ static void ngbe_reinit_locked(struct wx *wx)
 	mutex_unlock(&wx->reset_lock);
 }
 
-void ngbe_do_reset(struct net_device *netdev)
+void ngbe_do_reset(struct net_device *netdev, bool reinit)
 {
 	struct wx *wx = netdev_priv(netdev);
 
-	if (netif_running(netdev))
+	if (netif_running(netdev) && reinit)
 		ngbe_reinit_locked(wx);
 	else
 		ngbe_reset(wx);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index 4f648f272c08..c9233dc7ae50 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -125,6 +125,6 @@ extern char ngbe_driver_name[];
 void ngbe_down(struct wx *wx);
 void ngbe_up(struct wx *wx);
 int ngbe_setup_tc(struct net_device *dev, u8 tc);
-void ngbe_do_reset(struct net_device *netdev);
+void ngbe_do_reset(struct net_device *netdev, bool reinit);
 
 #endif /* _NGBE_TYPE_H_ */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index b13c48507374..f6050775af71 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -613,11 +613,11 @@ static void txgbe_reinit_locked(struct wx *wx)
 	mutex_unlock(&wx->reset_lock);
 }
 
-void txgbe_do_reset(struct net_device *netdev)
+void txgbe_do_reset(struct net_device *netdev, bool reinit)
 {
 	struct wx *wx = netdev_priv(netdev);
 
-	if (netif_running(netdev))
+	if (netif_running(netdev) && reinit)
 		txgbe_reinit_locked(wx);
 	else
 		txgbe_reset(wx);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 6b05f32b4a01..1e373f7fd9b5 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -313,7 +313,7 @@ extern char txgbe_driver_name[];
 void txgbe_down(struct wx *wx);
 void txgbe_up(struct wx *wx);
 int txgbe_setup_tc(struct net_device *dev, u8 tc);
-void txgbe_do_reset(struct net_device *netdev);
+void txgbe_do_reset(struct net_device *netdev, bool reinit);
 
 #define TXGBE_LINK_SPEED_UNKNOWN        0
 #define TXGBE_LINK_SPEED_10GB_FULL      4
-- 
2.51.0


^ permalink raw reply related

* [RFC PATCH net-next 4/5] net: wangxun: extract the close_suspend sequence
From: Jiawen Wu @ 2026-04-22  9:56 UTC (permalink / raw)
  To: netdev
  Cc: Mengyuan Lou, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran, Russell King,
	Simon Horman, Kees Cook, Larysa Zaremba, Breno Leitao, Joe Damato,
	Jacob Keller, Fabio Baltieri, Jiawen Wu
In-Reply-To: <20260422095617.27080-1-jiawenwu@trustnetic.com>

Refactor the .ndo_close implementation by extracting the necessary
hardware shutdown sequence into a dedicated close_suspend function.

This is for later implementation of PCIe error callback function in
libwx.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_type.h   |  1 +
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c  | 18 +++++++++++++-----
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h  |  1 +
 .../net/ethernet/wangxun/txgbe/txgbe_main.c    | 13 +++++++------
 .../net/ethernet/wangxun/txgbe/txgbe_type.h    |  1 +
 5 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 671ac0a19dee..4b72835ddec1 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1403,6 +1403,7 @@ struct wx {
 	void (*configure_fdir)(struct wx *wx);
 	int (*setup_tc)(struct net_device *netdev, u8 tc);
 	void (*do_reset)(struct net_device *netdev, bool reinit);
+	void (*close_suspend)(struct wx *wx);
 	int (*ptp_setup_sdp)(struct wx *wx);
 	void (*set_num_queues)(struct wx *wx);
 
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index ec14dd47cd42..bd6c0c9c51ba 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -135,6 +135,7 @@ static int ngbe_sw_init(struct wx *wx)
 	wx->mbx.size = WX_VXMAILBOX_SIZE;
 	wx->setup_tc = ngbe_setup_tc;
 	wx->do_reset = ngbe_do_reset;
+	wx->close_suspend = ngbe_close_suspend;
 	set_bit(0, &wx->fwd_bitmask);
 
 	return 0;
@@ -510,6 +511,16 @@ void ngbe_up(struct wx *wx)
 	ngbe_up_complete(wx);
 }
 
+void ngbe_close_suspend(struct wx *wx)
+{
+	wx_ptp_suspend(wx);
+	ngbe_down(wx);
+	wx_free_irq(wx);
+	wx_free_isb_resources(wx);
+	wx_free_resources(wx);
+	phylink_disconnect_phy(wx->phylink);
+}
+
 /**
  * ngbe_close - Disables a network interface
  * @netdev: network interface device structure
@@ -526,11 +537,8 @@ static int ngbe_close(struct net_device *netdev)
 	struct wx *wx = netdev_priv(netdev);
 
 	wx_ptp_stop(wx);
-	ngbe_down(wx);
-	wx_free_irq(wx);
-	wx_free_isb_resources(wx);
-	wx_free_resources(wx);
-	phylink_disconnect_phy(wx->phylink);
+	if (netif_device_present(netdev))
+		ngbe_close_suspend(wx);
 	wx_control_hw(wx, false);
 
 	return 0;
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index c9233dc7ae50..eb5c92edae06 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -126,5 +126,6 @@ void ngbe_down(struct wx *wx);
 void ngbe_up(struct wx *wx);
 int ngbe_setup_tc(struct net_device *dev, u8 tc);
 void ngbe_do_reset(struct net_device *netdev, bool reinit);
+void ngbe_close_suspend(struct wx *wx);
 
 #endif /* _NGBE_TYPE_H_ */
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index f6050775af71..20969dd7d47a 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -415,6 +415,7 @@ static int txgbe_sw_init(struct wx *wx)
 
 	wx->setup_tc = txgbe_setup_tc;
 	wx->do_reset = txgbe_do_reset;
+	wx->close_suspend = txgbe_close_suspend;
 	set_bit(0, &wx->fwd_bitmask);
 
 	switch (wx->mac.type) {
@@ -503,10 +504,12 @@ static int txgbe_open(struct net_device *netdev)
  * This function should contain the necessary work common to both suspending
  * and closing of the device.
  */
-static void txgbe_close_suspend(struct wx *wx)
+void txgbe_close_suspend(struct wx *wx)
 {
 	wx_ptp_suspend(wx);
-	txgbe_disable_device(wx);
+	txgbe_down(wx);
+	wx_free_irq(wx);
+	txgbe_free_misc_irq(wx->priv);
 	wx_free_resources(wx);
 }
 
@@ -526,10 +529,8 @@ static int txgbe_close(struct net_device *netdev)
 	struct wx *wx = netdev_priv(netdev);
 
 	wx_ptp_stop(wx);
-	txgbe_down(wx);
-	wx_free_irq(wx);
-	txgbe_free_misc_irq(wx->priv);
-	wx_free_resources(wx);
+	if (netif_device_present(netdev))
+		txgbe_close_suspend(wx);
 	txgbe_fdir_filter_exit(wx);
 	wx_control_hw(wx, false);
 
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index 1e373f7fd9b5..cd50ff1ef2ed 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -314,6 +314,7 @@ void txgbe_down(struct wx *wx);
 void txgbe_up(struct wx *wx);
 int txgbe_setup_tc(struct net_device *dev, u8 tc);
 void txgbe_do_reset(struct net_device *netdev, bool reinit);
+void txgbe_close_suspend(struct wx *wx);
 
 #define TXGBE_LINK_SPEED_UNKNOWN        0
 #define TXGBE_LINK_SPEED_10GB_FULL      4
-- 
2.51.0


^ permalink raw reply related

* [RFC PATCH net-next 2/5] net: wangxun: add Tx timeout process
From: Jiawen Wu @ 2026-04-22  9:56 UTC (permalink / raw)
  To: netdev
  Cc: Mengyuan Lou, Andrew Lunn, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Richard Cochran, Russell King,
	Simon Horman, Kees Cook, Larysa Zaremba, Breno Leitao, Joe Damato,
	Jacob Keller, Fabio Baltieri, Jiawen Wu
In-Reply-To: <20260422095617.27080-1-jiawenwu@trustnetic.com>

Implement .ndo_tx_timeout to handle Tx side timeout event. When Tx
timeout event occur, it will triger driver into reset process.

The WX_HANG_CHECK_ARMED bit is set to indicate a potential hang. It will
be cleared if a pause frame is received to remove false hang detection
due to 802.3 frames.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ethernet/wangxun/libwx/Makefile   |   2 +-
 drivers/net/ethernet/wangxun/libwx/wx_err.c   | 125 ++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_err.h   |  14 ++
 drivers/net/ethernet/wangxun/libwx/wx_hw.c    |  17 ++-
 drivers/net/ethernet/wangxun/libwx/wx_lib.c   |  37 ++++++
 drivers/net/ethernet/wangxun/libwx/wx_lib.h   |   1 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h  |  12 +-
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c |   4 +
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   |   4 +
 9 files changed, 211 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_err.c
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_err.h

diff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile
index a71b0ad77de3..c8724bb129aa 100644
--- a/drivers/net/ethernet/wangxun/libwx/Makefile
+++ b/drivers/net/ethernet/wangxun/libwx/Makefile
@@ -4,5 +4,5 @@
 
 obj-$(CONFIG_LIBWX) += libwx.o
 
-libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o
+libwx-objs := wx_hw.o wx_lib.o wx_ethtool.o wx_ptp.o wx_mbx.o wx_sriov.o wx_err.o
 libwx-objs += wx_vf.o wx_vf_lib.o wx_vf_common.o
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.c b/drivers/net/ethernet/wangxun/libwx/wx_err.c
new file mode 100644
index 000000000000..42e00f0bd8da
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_err.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2015 - 2026 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "wx_type.h"
+#include "wx_lib.h"
+#include "wx_err.h"
+
+static void wx_reset_subtask(struct wx *wx)
+{
+	if (!test_bit(WX_FLAG_NEED_PF_RESET, wx->flags))
+		return;
+
+	rtnl_lock();
+
+	if (!netif_running(wx->netdev) ||
+	    test_bit(WX_STATE_RESETTING, wx->state))
+		return;
+
+	wx_warn(wx, "Reset adapter.\n");
+
+	if (test_and_clear_bit(WX_FLAG_NEED_PF_RESET, wx->flags)) {
+		if (wx->do_reset)
+			wx->do_reset(wx->netdev);
+	}
+
+	rtnl_unlock();
+}
+
+/*
+ * wx_check_tx_hang_subtask - check for hung queues and dropped interrupts
+ * @wx - pointer to the device wx structure
+ *
+ * This function serves two purposes.  First it strobes the interrupt lines
+ * in order to make certain interrupts are occurring.  Secondly it sets the
+ * bits needed to check for TX hangs.  As a result we should immediately
+ * determine if a hang has occurred.
+ */
+static void wx_check_tx_hang_subtask(struct wx *wx)
+{
+	int i;
+
+	/* If we're down or resetting, just bail */
+	if (!netif_running(wx->netdev) ||
+	    test_bit(WX_STATE_RESETTING, wx->state))
+		return;
+
+	/* Force detection of hung controller */
+	if (netif_carrier_ok(wx->netdev)) {
+		for (i = 0; i < wx->num_tx_queues; i++)
+			set_bit(WX_TX_DETECT_HANG, wx->tx_ring[i]->state);
+	}
+}
+
+void wx_handle_errors_subtask(struct wx *wx)
+{
+	wx_reset_subtask(wx);
+	wx_check_tx_hang_subtask(wx);
+}
+EXPORT_SYMBOL(wx_handle_errors_subtask);
+
+static void wx_tx_timeout_reset(struct wx *wx)
+{
+	if (!netif_running(wx->netdev))
+		return;
+
+	set_bit(WX_FLAG_NEED_PF_RESET, wx->flags);
+	wx_warn(wx, "initiating reset due to tx timeout\n");
+	wx_service_event_schedule(wx);
+}
+
+void wx_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+{
+	struct wx *wx = netdev_priv(netdev);
+	u32 head, tail;
+	int i;
+
+	for (i = 0; i < wx->num_tx_queues; i++) {
+		struct wx_ring *tx_ring = wx->tx_ring[i];
+
+		if (test_bit(WX_TX_DETECT_HANG, tx_ring->state) &&
+		    wx_check_tx_hang(tx_ring))
+			wx_warn(wx, "Real tx hang detected on queue %d\n", i);
+
+		head = rd32(wx, WX_PX_TR_RP(tx_ring->reg_idx));
+		tail = rd32(wx, WX_PX_TR_WP(tx_ring->reg_idx));
+		wx_warn(wx,
+			"tx ring %d next_to_use is %d, next_to_clean is %d\n",
+			i, tx_ring->next_to_use,
+			tx_ring->next_to_clean);
+		wx_warn(wx, "tx ring %d hw rp is 0x%x, wp is 0x%x\n",
+			i, head, tail);
+	}
+
+	wx_tx_timeout_reset(wx);
+}
+EXPORT_SYMBOL(wx_tx_timeout);
+
+void wx_handle_tx_hang(struct wx_ring *tx_ring, unsigned int next)
+{
+	struct wx *wx = netdev_priv(tx_ring->netdev);
+
+	wx_warn(wx, "Detected Tx Unit Hang\n"
+		"  Tx Queue             <%d>\n"
+		"  TDH, TDT             <%x>, <%x>\n"
+		"  next_to_use          <%x>\n"
+		"  next_to_clean        <%x>\n"
+		"tx_buffer_info[next_to_clean]\n"
+		"  time_stamp           <%lx>\n"
+		"  jiffies              <%lx>\n",
+		tx_ring->queue_index,
+		rd32(wx, WX_PX_TR_RP(tx_ring->reg_idx)),
+		rd32(wx, WX_PX_TR_WP(tx_ring->reg_idx)),
+		tx_ring->next_to_use, next,
+		tx_ring->tx_buffer_info[next].time_stamp, jiffies);
+
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+	wx_warn(wx, "tx hang detected on queue %d, resetting adapter\n",
+		tx_ring->queue_index);
+
+	wx_tx_timeout_reset(wx);
+}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_err.h b/drivers/net/ethernet/wangxun/libwx/wx_err.h
new file mode 100644
index 000000000000..e317e6c8d928
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_err.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2015 - 2026 Beijing WangXun Technology Co., Ltd.
+ */
+
+#ifndef _WX_ERR_H_
+#define _WX_ERR_H_
+
+void wx_handle_errors_subtask(struct wx *wx);
+void wx_tx_timeout(struct net_device *netdev, unsigned int txqueue);
+void wx_handle_tx_hang(struct wx_ring *tx_ring, unsigned int next);
+
+#endif /* _WX_ERR_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index d3772d01e00b..401dc7eb1137 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1932,6 +1932,7 @@ static void wx_configure_tx_ring(struct wx *wx,
 	else
 		ring->atr_sample_rate = 0;
 
+	bitmap_zero(ring->state, WX_RING_STATE_NBITS);
 	/* reinitialize tx_buffer_info */
 	memset(ring->tx_buffer_info, 0,
 	       sizeof(struct wx_tx_buffer) * ring->count);
@@ -2847,16 +2848,26 @@ EXPORT_SYMBOL(wx_fc_enable);
 static void wx_update_xoff_rx_lfc(struct wx *wx)
 {
 	struct wx_hw_stats *hwstats = &wx->stats;
+	u64 data;
+	int i;
 
 	if (wx->fc.mode != wx_fc_full &&
 	    wx->fc.mode != wx_fc_rx_pause)
 		return;
 
 	if (wx->mac.type >= wx_mac_aml)
-		hwstats->lxoffrxc += rd32_wrap(wx, WX_MAC_LXOFFRXC_AML,
-					       &wx->last_stats.lxoffrxc);
+		data = rd32_wrap(wx, WX_MAC_LXOFFRXC_AML,
+				 &wx->last_stats.lxoffrxc);
 	else
-		hwstats->lxoffrxc += rd64(wx, WX_MAC_LXOFFRXC);
+		data = rd64(wx, WX_MAC_LXOFFRXC);
+	hwstats->lxoffrxc += data;
+
+	/* refill credits (no tx hang) if we received xoff */
+	if (!data)
+		return;
+
+	for (i = 0; i < wx->num_tx_queues; i++)
+		clear_bit(WX_HANG_CHECK_ARMED, wx->tx_ring[i]->state);
 }
 
 /**
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 746623fa59b4..9e6167b43f75 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -14,6 +14,7 @@
 
 #include "wx_type.h"
 #include "wx_lib.h"
+#include "wx_err.h"
 #include "wx_ptp.h"
 #include "wx_hw.h"
 #include "wx_vf_lib.h"
@@ -742,6 +743,36 @@ static struct netdev_queue *wx_txring_txq(const struct wx_ring *ring)
 	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
 }
 
+static u32 wx_get_tx_pending(struct wx_ring *ring)
+{
+	unsigned int head, tail;
+
+	head = ring->next_to_clean;
+	tail = ring->next_to_use;
+
+	return ((head <= tail) ? tail : tail + ring->count) - head;
+}
+
+bool wx_check_tx_hang(struct wx_ring *ring)
+{
+	u32 tx_done_old = ring->tx_stats.tx_done_old;
+	u32 tx_pending = wx_get_tx_pending(ring);
+	u32 tx_done = ring->stats.packets;
+
+	clear_bit(WX_TX_DETECT_HANG, ring->state);
+
+	if (tx_done_old == tx_done && tx_pending)
+		/* make sure it is true for two checks in a row */
+		return test_and_set_bit(WX_HANG_CHECK_ARMED, ring->state);
+
+	/* update completed stats and continue */
+	ring->tx_stats.tx_done_old = tx_done;
+	/* reset the countdown */
+	clear_bit(WX_HANG_CHECK_ARMED, ring->state);
+
+	return false;
+}
+
 /**
  * wx_clean_tx_irq - Reclaim resources after transmit completes
  * @q_vector: structure containing interrupt and ring information
@@ -866,6 +897,12 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector,
 	netdev_tx_completed_queue(wx_txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
+	if (test_bit(WX_TX_DETECT_HANG, tx_ring->state) &&
+	    wx_check_tx_hang(tx_ring)) {
+		wx_handle_tx_hang(tx_ring, i);
+		return true;
+	}
+
 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
 	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
 		     (wx_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
index aed6ea8cf0d6..e373cd7f05d3 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
@@ -10,6 +10,7 @@
 struct wx_dec_ptype wx_decode_ptype(const u8 ptype);
 void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count);
 u16 wx_desc_unused(struct wx_ring *ring);
+bool wx_check_tx_hang(struct wx_ring *ring);
 netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
 			  struct net_device *netdev);
 void wx_napi_enable_all(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 0da5565ee4ff..f65c2d7bae39 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -1039,6 +1039,7 @@ struct wx_queue_stats {
 struct wx_tx_queue_stats {
 	u64 restart_queue;
 	u64 tx_busy;
+	u32 tx_done_old;
 };
 
 struct wx_rx_queue_stats {
@@ -1054,6 +1055,12 @@ struct wx_rx_queue_stats {
 #define wx_for_each_ring(posm, headm) \
 	for (posm = (headm).ring; posm; posm = posm->next)
 
+enum wx_ring_state {
+	WX_TX_DETECT_HANG,
+	WX_HANG_CHECK_ARMED,
+	WX_RING_STATE_NBITS
+};
+
 struct wx_ring_container {
 	struct wx_ring *ring;           /* pointer to linked list of rings */
 	unsigned int total_bytes;       /* total bytes processed this int */
@@ -1073,6 +1080,7 @@ struct wx_ring {
 		struct wx_tx_buffer *tx_buffer_info;
 		struct wx_rx_buffer *rx_buffer_info;
 	};
+	DECLARE_BITMAP(state, WX_RING_STATE_NBITS);
 	u8 __iomem *tail;
 	dma_addr_t dma;                 /* phys. address of descriptor ring */
 	dma_addr_t headwb_dma;
@@ -1273,6 +1281,7 @@ enum wx_pf_flags {
 	WX_FLAG_NEED_DO_RESET,
 	WX_FLAG_RX_MERGE_ENABLED,
 	WX_FLAG_TXHEAD_WB_ENABLED,
+	WX_FLAG_NEED_PF_RESET,
 	WX_PF_FLAGS_NBITS               /* must be last */
 };
 
@@ -1503,7 +1512,8 @@ rd32_wrap(struct wx *wx, u32 reg, u32 *last)
 
 #define wx_err(wx, fmt, arg...) \
 	dev_err(&(wx)->pdev->dev, fmt, ##arg)
-
+#define wx_warn(wx, fmt, arg...) \
+	dev_warn(&(wx)->pdev->dev, fmt, ##arg)
 #define wx_dbg(wx, fmt, arg...) \
 	dev_dbg(&(wx)->pdev->dev, fmt, ##arg)
 
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index bd905e267575..e9561996b970 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -14,6 +14,7 @@
 #include "../libwx/wx_type.h"
 #include "../libwx/wx_hw.h"
 #include "../libwx/wx_lib.h"
+#include "../libwx/wx_err.h"
 #include "../libwx/wx_ptp.h"
 #include "../libwx/wx_mbx.h"
 #include "../libwx/wx_sriov.h"
@@ -147,6 +148,7 @@ static void ngbe_service_task(struct work_struct *work)
 {
 	struct wx *wx = container_of(work, struct wx, service_task);
 
+	wx_handle_errors_subtask(wx);
 	wx_update_stats(wx);
 
 	wx_service_event_complete(wx);
@@ -642,6 +644,7 @@ static const struct net_device_ops ngbe_netdev_ops = {
 	.ndo_stop               = ngbe_close,
 	.ndo_change_mtu         = wx_change_mtu,
 	.ndo_start_xmit         = wx_xmit_frame,
+	.ndo_tx_timeout         = wx_tx_timeout,
 	.ndo_set_rx_mode        = wx_set_rx_mode,
 	.ndo_set_features       = wx_set_features,
 	.ndo_fix_features       = wx_fix_features,
@@ -731,6 +734,7 @@ static int ngbe_probe(struct pci_dev *pdev,
 	wx->driver_name = ngbe_driver_name;
 	ngbe_set_ethtool_ops(netdev);
 	netdev->netdev_ops = &ngbe_netdev_ops;
+	netdev->watchdog_timeo = 5 * HZ;
 
 	netdev->features = NETIF_F_SG | NETIF_F_IP_CSUM |
 			   NETIF_F_TSO | NETIF_F_TSO6 |
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index ec32a5f422f2..b13c48507374 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -14,6 +14,7 @@
 
 #include "../libwx/wx_type.h"
 #include "../libwx/wx_lib.h"
+#include "../libwx/wx_err.h"
 #include "../libwx/wx_ptp.h"
 #include "../libwx/wx_hw.h"
 #include "../libwx/wx_mbx.h"
@@ -128,6 +129,7 @@ static void txgbe_service_task(struct work_struct *work)
 {
 	struct wx *wx = container_of(work, struct wx, service_task);
 
+	wx_handle_errors_subtask(wx);
 	txgbe_module_detection_subtask(wx);
 	txgbe_link_config_subtask(wx);
 	wx_update_stats(wx);
@@ -659,6 +661,7 @@ static const struct net_device_ops txgbe_netdev_ops = {
 	.ndo_stop               = txgbe_close,
 	.ndo_change_mtu         = wx_change_mtu,
 	.ndo_start_xmit         = wx_xmit_frame,
+	.ndo_tx_timeout         = wx_tx_timeout,
 	.ndo_set_rx_mode        = wx_set_rx_mode,
 	.ndo_set_features       = wx_set_features,
 	.ndo_fix_features       = wx_fix_features,
@@ -750,6 +753,7 @@ static int txgbe_probe(struct pci_dev *pdev,
 	wx->driver_name = txgbe_driver_name;
 	txgbe_set_ethtool_ops(netdev);
 	netdev->netdev_ops = &txgbe_netdev_ops;
+	netdev->watchdog_timeo = 5 * HZ;
 	netdev->udp_tunnel_nic_info = &txgbe_udp_tunnels;
 
 	/* setup the private structure */
-- 
2.51.0


^ permalink raw reply related

* [PATCH] NFC: trf7970a: Ignore antenna noise when checking for RF field
From: Paul Geurts @ 2026-04-22 10:09 UTC (permalink / raw)
  To: mgreer, sameo, linux-wireless, netdev, linux-kernel
  Cc: martijn.de.gouw, Paul Geurts

The main channel Received Signal Strength Indicator (RSSI) measurement
is used to determine whether an RF field is present or not. RSSI != 0
is interpreted as an RF Field is present. This does not take RF noise
and measurement inaccuracy into account, and results in false positives
in the field.

Define a noise level and make sure the RF field is only interpreted as
present when the RSSI is above the noise level.

Fixes: 851ee3cbf850 ("NFC: trf7970a: Don't turn on RF if there is already an RF field")
Signed-off-by: Paul Geurts <paul.geurts@prodrive-technologies.com>
---
 drivers/nfc/trf7970a.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index d17c701c7888..08c27bb438b5 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c
@@ -317,6 +317,7 @@
 #define TRF7970A_RSSI_OSC_STATUS_RSSI_MASK	(BIT(2) | BIT(1) | BIT(0))
 #define TRF7970A_RSSI_OSC_STATUS_RSSI_X_MASK	(BIT(5) | BIT(4) | BIT(3))
 #define TRF7970A_RSSI_OSC_STATUS_RSSI_OSC_OK	BIT(6)
+#define TRF7970A_RSSI_OSC_STATUS_RSSI_NOISE_LEVEL	1
 
 #define TRF7970A_SPECIAL_FCN_REG1_COL_7_6		BIT(0)
 #define TRF7970A_SPECIAL_FCN_REG1_14_ANTICOLL		BIT(1)
@@ -1300,7 +1301,7 @@ static int trf7970a_is_rf_field(struct trf7970a *trf, bool *is_rf_field)
 	if (ret)
 		return ret;
 
-	if (rssi & TRF7970A_RSSI_OSC_STATUS_RSSI_MASK)
+	if ((rssi & TRF7970A_RSSI_OSC_STATUS_RSSI_MASK) > TRF7970A_RSSI_OSC_STATUS_RSSI_NOISE_LEVEL)
 		*is_rf_field = true;
 	else
 		*is_rf_field = false;
-- 
2.39.2


^ permalink raw reply related

* Re: [PATCH] dt-bindings: Fix phandle-array constraints, again
From: Krzysztof Kozlowski @ 2026-04-22 10:22 UTC (permalink / raw)
  To: Rob Herring (Arm)
  Cc: Maarten Lankhorst, Maxime Ripard, Krzysztof Kozlowski,
	Conor Dooley, Ulf Hansson, Stephan Gerhold, Andrew Lunn,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Johannes Berg, Jeff Johnson, Bjorn Helgaas, Lorenzo Pieralisi,
	Krzysztof Wilczyński, Manivannan Sadhasivam, Bjorn Andersson,
	Mathieu Poirier, Sylwester Nawrocki, Mark Brown, Maxime Coquelin,
	Greg Kroah-Hartman, Yang Xiwen, Alex Elder, Chaitanya Chundru,
	Sibi Sankar, Rao Mandadapu, Patrice Chotard, Xu Yang, Peng Fan,
	Thomas Zimmermann, devicetree, linux-kernel, linux-mmc,
	linux-arm-msm, netdev, linux-wireless, ath10k, ath11k, linux-pci,
	linux-remoteproc, linux-sound, linux-spi, linux-usb
In-Reply-To: <20260421195836.1547469-1-robh@kernel.org>

On Tue, Apr 21, 2026 at 02:55:25PM -0500, Rob Herring (Arm) wrote:
> diff --git a/Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml b/Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml
> index b30544410d09..e47e1e09300a 100644
> --- a/Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml
> +++ b/Documentation/devicetree/bindings/net/qcom,bam-dmux.yaml
> @@ -42,7 +42,13 @@ properties:
>      description: State bits used by the AP to signal the modem.
>      items:
>        - description: Power control
> +        items:
> +          - description: Phandle to ???
> +          - description: ???
>        - description: Power control acknowledgment
> +        items:
> +          - description: Phandle to ???
> +          - description: ???
>  

Here and in all cases except qcom,msm8916-mss-pil:

 - description: Phandle to the Shared Memory Point 2 Point device
     handling the communication with a remote processor

 - description: Single bit index to toggle in the value sent to
     the remote processor
   maximum: 32

...

> diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,msm8916-mss-pil.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,msm8916-mss-pil.yaml
> index c179b560572b..3c614cb7ce88 100644
> --- a/Documentation/devicetree/bindings/remoteproc/qcom,msm8916-mss-pil.yaml
> +++ b/Documentation/devicetree/bindings/remoteproc/qcom,msm8916-mss-pil.yaml
> @@ -104,6 +104,9 @@ properties:
>      description: States used by the AP to signal the Hexagon core
>      items:
>        - description: Stop modem
> +        items:
> +          - description: Phandle to ???
> +          - description: ???

 - description: Phandle to the Shared Memory Point 2 Point or Shared
     Memory Manager device handling the communication with a remote
     processor

 - description: Single bit index to toggle in the value sent to
     the remote processor
   maximum: 32


Best regards,
Krzysztof


^ permalink raw reply

* Re: Bug#1130336: [regression] Network failure beyond first connection after 69894e5b4c5e ("netfilter: nft_connlimit: update the count if add was skipped")
From: Fernando Fernandez Mancera @ 2026-04-22 10:32 UTC (permalink / raw)
  To: Thorsten Leemhuis, Alejandro Oliván Alvarez,
	Salvatore Bonaccorso, 1130336
  Cc: Florian Westphal, Pablo Neira Ayuso, Phil Sutter, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	netfilter-devel, coreteam, netdev, linux-kernel, regressions,
	stable
In-Reply-To: <0b8607c8-2d29-4fca-961a-b7a677e968a1@leemhuis.info>

On 4/22/26 11:18 AM, Thorsten Leemhuis wrote:
> Lo! Top-posting on purpose to make this easy to process.
> 
> What happened to this regression? It looks a bit like things stalled and
> fell through the cracks. Or Fernando, did you post a patch like you
> mentioned? I looked for one referring the commit or the reporter, but
> could not find anything -- but maybe I missed it.
> 

Yes, it stalled and fell through the cracks. Let me prepare a fix as I 
mentioned.

Thanks for the reminder Thorsten!

> Ciao, Thorsten
> 
> On 3/19/26 09:59, Fernando Fernandez Mancera wrote:
>> On 3/19/26 9:44 AM, Alejandro Oliván Alvarez wrote:
>>> Hi folks.
>>>
>>> On Wed, 2026-03-18 at 13:49 +0100, Salvatore Bonaccorso wrote:
>>>> Hi Alejandro,
>>>>
>>>> On Sun, Mar 15, 2026 at 02:09:33AM +0100, Fernando Fernandez Mancera
>>>> wrote:
>>>>> On 3/14/26 8:25 PM, Florian Westphal wrote:
>>>>>> Fernando Fernandez Mancera <fmancera@suse.de> wrote:
>>>>>>> On 3/14/26 5:13 PM, Fernando Fernandez Mancera wrote:
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> On 3/14/26 3:03 PM, Salvatore Bonaccorso wrote:
>>>>>>>>> Control: forwarded -1
>>>>>>>>> https://lore.kernel.org/
>>>>>>>>> regressions/177349610461.3071718.4083978280323144323@eldama
>>>>>>>>> r.lan
>>>>>>>>> Control: tags -1 + upstream
>>>>>>>>>
>>>>>>>>> Hi
>>>>>>>>>
>>>>>>>>> In Debian, in https://bugs.debian.org/1130336, Alejandro
>>>>>>>>> reported that
>>>>>>>>> after updates including 69894e5b4c5e ("netfilter:
>>>>>>>>> nft_connlimit:
>>>>>>>>> update the count if add was skipped"), when the following
>>>>>>>>> rule is set
>>>>>>>>>
>>>>>>>>>        iptables -A INPUT -p tcp -m
>>>>>>>>> connlimit --connlimit-above 111 -j
>>>>>>>>> REJECT --reject-with tcp-reset
>>>>>>>>>
>>>>>>>>> connections get stuck accordingly, it can be easily
>>>>>>>>> reproduced by:
>>>>>>>>>
>>>>>>>>> # iptables -A INPUT -p tcp -m connlimit
>>>>>>>>> --connlimit-above 111 -j REJECT
>>>>>>>>> --reject-with tcp-reset
>>>>>>>>> # nft list ruleset
>>>>>>>>> # Warning: table ip filter is managed by iptables-nft, do
>>>>>>>>> not touch!
>>>>>>>>> table ip filter {
>>>>>>>>>             chain INPUT {
>>>>>>>>>                     type filter hook input priority filter;
>>>>>>>>> policy accept;
>>>>>>>>>                     ip protocol tcp xt
>>>>>>>>> match "connlimit" counter packets 0
>>>>>>>>> bytes 0 reject with tcp reset
>>>>>>>>>             }
>>>>>>>>> }
>>>>>>>>> # wget -O /dev/null
>>>>>>>>> https://git.kernel.org/torvalds/t/linux-7.0-
>>>>>>>>> rc3.tar.gz
>>>>>>>>> --2026-03-14 14:53:51--
>>>>>>>>> https://git.kernel.org/torvalds/t/linux-7.0-
>>>>>>>>> rc3.tar.gz
>>>>>>>>> Resolving git.kernel.org
>>>>>>>>> (git.kernel.org)... 172.105.64.184,
>>>>>>>>> 2a01:7e01:e001:937:0:1991:8:25
>>>>>>>>> Connecting to git.kernel.org
>>>>>>>>> (git.kernel.org)|172.105.64.184|:443...
>>>>>>>>> connected.
>>>>>>>>> HTTP request sent, awaiting response... 301 Moved
>>>>>>>>> Permanently
>>>>>>>>> Location:
>>>>>>>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/
>>>>>>>>> linux.git/snapshot/linux-7.0-rc3.tar.gz
>>>>>>>>> [following]
>>>>>>>>> --2026-03-14 14:53:51--
>>>>>>>>> https://git.kernel.org/pub/scm/linux/kernel/ git/torvalds/l
>>>>>>>>> inux.git/snapshot/linux-7.0-rc3.tar.gz
>>>>>>>>> Reusing existing connection to git.kernel.org:443.
>>>>>>>>> HTTP request sent, awaiting response... 200 OK
>>>>>>>>> Length: unspecified [application/x-gzip]
>>>>>>>>> Saving to: ‘/dev/null’
>>>>>>>>>
>>>>>>>>> /dev/null                         [
>>>>>>>>> <=>                    ] 248.03M
>>>>>>>>> 51.9MB/s    in 5.0s
>>>>>>>>>
>>>>>>>>> 2026-03-14 14:53:56 (49.3 MB/s) - ‘/dev/null’ saved
>>>>>>>>> [260080129]
>>>>>>>>>
>>>>>>>>> # wget -O /dev/null
>>>>>>>>> https://git.kernel.org/torvalds/t/linux-7.0-
>>>>>>>>> rc3.tar.gz
>>>>>>>>> --2026-03-14 14:53:58--
>>>>>>>>> https://git.kernel.org/torvalds/t/linux-7.0-
>>>>>>>>> rc3.tar.gz
>>>>>>>>> Resolving git.kernel.org
>>>>>>>>> (git.kernel.org)... 172.105.64.184,
>>>>>>>>> 2a01:7e01:e001:937:0:1991:8:25
>>>>>>>>> Connecting to git.kernel.org
>>>>>>>>> (git.kernel.org)|172.105.64.184|:443...
>>>>>>>>> failed: Connection timed out.
>>>>>>>>> Connecting to git.kernel.org
>>>>>>>>> (git.kernel.org)|
>>>>>>>>> 2a01:7e01:e001:937:0:1991:8:25|:443...
>>>>>>>>> failed: Network is unreachable.
>>>>>>>>>
>>>>>>>>> Before the 69894e5b4c5e ("netfilter: nft_connlimit: update
>>>>>>>>> the count
>>>>>>>>> if add was skipped") commit this worked.
>>>>>>>>>
>>>>>>>>
>>>>>>>> Thanks for the report. I have reproduced
>>>>>>>> this on upstream kernel. I am working on it.
>>>>>>>>
>>>>>>>
>>>>>>> This is what is happening:
>>>>>>>
>>>>>>> 1. The first connection is established and
>>>>>>> tracked, all good. When it finishes, it goes to
>>>>>>> TIME_WAIT state
>>>>>>> 2. The second connection is established, ct is
>>>>>>> confirmed since the beginning, skipping the
>>>>>>> tracking and calling a GC.
>>>>>>> 3. The previously tracked connection is cleaned
>>>>>>> up during GC as TIME_WAIT is considered closed.
>>>>>>
>>>>>> This is stupid.  The fix is to add --syn or use
>>>>>> OUTPUT.  Its not even clear to me what the user wants to achive
>>>>>> with this rule.
>>>>>>
>>>>>
>>>>> Yes, the ruleset shown does not make sense. Having said this, it
>>>>> could
>>>>> affect to a soft-limit scenario as the one described on the blamed
>>>>> commit..
>>>>
>>>> Alejandro, can you describe what you would like to achieve with the
>>>> specific rule?
>>>>
>>>> Regards,
>>>> Salvatore
>>>
>>> The intended use of that rule was to prevent (limit) a single host from
>>> establishing too many TCP connections to given host (Denial of
>>> Service... particularly on streaming servers).
>>>
>>> I learnt about it in several IPtables guides/howtos (maaaany years
>>> ago!), and never was an issue on itself.
>>> Was it stupid? ... possibly... It 'seemed' to work, or, at least, when
>>> checking iptables -L -v one could see packet counter for the rule
>>> catching some traffic, without ever noticing it being troublesome, so,
>>> at the very least it 'didn't hurt', and, since DoS ever happened over
>>> the years...well, I tended to think it was indeed working the way I
>>> read it did.
>>>
>>> Certainly, I never (the authors of those guides at their time indeed)
>>> though about the possibility of just target the TCP syn.
>>> I have given a try to adding the --syn option to the rule to see the
>>> difference, and well, it is way less disruptive that way, but it still
>>> breaks things (I saw postfix queues hanging, for instance).
>>>
>>
>> The current problem with the ruleset is that it mixes both, incoming and
>> outgoing connections. This should probably use --syn flag so it targets
>> connections established against your host only.
>>
>> Anyway, I am sending a patch fixing this as it makes sense to do it IMO.
>> We just want to understand what is the real use-case and how the ruleset
>> can be improved.
>>
>> In addition, I would recommend you to transition to nftables because it
>> would be ideal for your use-case. With nftables it would be easy to
>> combine this with sets and probably quota expression to limit the usage.
>>
>> What is wrong with the current ruleset? (Even before the blammed
>> commit), if you reach the connlimit limit **ALL** TCP connections will
>> be rejected (including legit ones), I do not think that is what you want
>> to achieve.
>>
>> Thanks,
>> Fernando.
>>
>>> So, I have but screwed the idea of using connlimit anymore anyways.
>>> Sorry for the noise. Lesson learned.
>>>
>>> Cheers!
>>
>>
> 
> 


^ permalink raw reply

* Re: [PATCH net v2] ipv6: addrconf: skip ERRDAD transition when address already DEAD
From: Sabrina Dubroca @ 2026-04-22 10:43 UTC (permalink / raw)
  To: Linmao Li
  Cc: davem, dsahern, edumazet, kuba, pabeni, horms, netdev,
	linux-kernel
In-Reply-To: <20260421075033.1110816-1-lilinmao@kylinos.cn>

2026-04-21, 15:50:33 +0800, Linmao Li wrote:
> addrconf_dad_end() transitions ifp->state from DAD to POSTDAD under
> ifp->lock and releases the lock.  addrconf_dad_failure() takes
> ifp->lock again with the spin_lock_bh() following the
> net_info_ratelimited() duplicate-address log.  A concurrent
> ipv6_del_addr() can acquire the lock in that window, set ifp->state
> to DEAD and run list_del_rcu(&ifp->if_list).

You're pretty much saying that the ifp->state check we did in
addrconf_dad_end before dropping the lock is not valid, so it seems we
should just skip that separate check since it's not doing anything
useful, and move it under the "main" lock we acquire after the
net_info_ratelimited(). There would still be a problem with "we
dropped the lock in the STABLE_PRIVACY block", which your patch
handles.

> addrconf_dad_failure() then overwrites DEAD with ERRDAD at errdad:
> and schedules a new dad_work.  The work calls ipv6_del_addr() again,
> hitting the already-poisoned list entry:
> 
>   general protection fault: 0000 [#1] SMP NOPTI
>   CPU: 4 PID: 217 Comm: kworker/4:1
>   Workqueue: ipv6_addrconf addrconf_dad_work
>   RIP: 0010:ipv6_del_addr+0xe9/0x280
>   RAX: dead000000000122
>   Call Trace:
>    addrconf_dad_stop+0x113/0x140
>    addrconf_dad_work+0x28c/0x430
>    process_one_work+0x1eb/0x3b0
>    worker_thread+0x4d/0x400
>    kthread+0x104/0x140
>    ret_from_fork+0x35/0x40
> 
> Bail out at errdad: when ifp->state is already DEAD. The existing
> in6_ifa_put() releases the reference taken for this invocation.

Mentioning "the existing in6_ifa_put()" is a bit confusing since
you're adding a separate unlock/put/return path.

-- 
Sabrina

^ permalink raw reply

* Re: [PATCH net 00/18] Remove a number of ISA and PCMCIA Ethernet drivers
From: Finn Thain @ 2026-04-22 10:45 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Andrew Lunn, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Jonathan Corbet, Shuah Khan,
	linux-kernel, netdev, linux-doc
In-Reply-To: <20260421-v7-0-0-net-next-driver-removal-v1-v1-0-69517c689d1f@lunn.ch>


On Tue, 21 Apr 2026, Andrew Lunn wrote:

> These old drivers have not been much of a Maintenance burden until 
> recently.

They are not much of a maintenance burden because they are small, mature 
and stable.

> Now there are more newbies using AI and fuzzers finding issues, 
> resulting in more work for Maintainers.

AI helps find issues but AI also helps resolve issues.

> Fixing these old drivers make little sense, if it is not clear they have 
> users.
> 

Removing these old drivers makes little sense, if the issues were only 
noticed by AI and not by actual users.

^ permalink raw reply

* Re: [PATCH net-deletions] net: remove ax25 and amateur radio (hamradio) subsystem
From: Simon Horman @ 2026-04-22 10:45 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, edumazet, pabeni, andrew+netdev, corbet, skhan,
	federico.vaga, carlos.bilbao, avadhut.naik, alexs, si.yanteng,
	dzm91, 2023002089, tsbogend, dsahern, jani.nikula, mchehab+huawei,
	gregkh, jirislaby, tytso, herbert, ebiggers, johannes.berg, geert,
	pablo, tglx, mashiro.chen, mingo, dqfext, jreuter, sdf, pkshih,
	enelsonmoore, mkl, toke, kees, crossd, jlayton, wangliang74,
	aha310510, takamitz, kuniyu, linux-doc, linux-mips
In-Reply-To: <20260421021824.1293976-1-kuba@kernel.org>

On Mon, Apr 20, 2026 at 07:18:23PM -0700, Jakub Kicinski wrote:
> Remove the amateur radio (AX.25, NET/ROM, ROSE) protocol implementation
> and all associated hamradio device drivers from the kernel tree.
> This set of protocols has long been a huge bug/syzbot magnet,
> and since nobody stepped up to help us deal with the influx
> of the AI-generated bug reports we need to move it out of tree
> to protect our sanity.
> 
> The code is moved to an out-of-tree repo:
> https://github.com/linux-netdev/mod-orphan
> if it's cleaned up and reworked there we can accept it back.
> 
> Minimal stub headers are kept for include/net/ax25.h (AX25_P_IP,
> AX25_ADDR_LEN, ax25_address) and include/net/rose.h (ROSE_ADDR_LEN)
> so that the conditional integration code in arp.c and tun.c continues
> to compile and work when the out-of-tree modules are loaded.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Reviewed-by: Simon Horman <horms@kernel.org>

^ permalink raw reply

* Re: [PATCH net-deletions] caif: remove CAIF NETWORK LAYER
From: Simon Horman @ 2026-04-22 10:48 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, edumazet, pabeni, andrew+netdev, corbet, skhan,
	alexs, si.yanteng, dzm91, linux, mst, jasowang, xuanzhuo,
	eperezma, xu.xin16, wang.yaxin, jiang.kun2, linusw,
	jihed.chaibi.dev, arnd, tytso, jiayuan.chen, gregkh
In-Reply-To: <20260416182829.1440262-1-kuba@kernel.org>

On Thu, Apr 16, 2026 at 11:28:28AM -0700, Jakub Kicinski wrote:
> Remove CAIF (Communication CPU to Application CPU Interface), the
> ST-Ericsson modem protocol. The subsystem has been orphaned since 2013.
> The last meaningful changes from the maintainers were in March 2013:
>   a8c7687bf216 ("caif_virtio: Check that vringh_config is not null")
>   b2273be8d2df ("caif_virtio: Use vringh_notify_enable correctly")
>   0d2e1a2926b1 ("caif_virtio: Introduce caif over virtio")
> 
> Not-so-coincidentally, according to "the Internet" ST-Ericsson officially
> shut down its modem joint venture in Aug 2013.
> 
> If anyone is using this code please yell!
> 
> In the 13 years since, the code has accumulated 200 non-merge commits,
> of which 71 were cross-tree API changes, 21 carried Fixes: tags, and
> the remaining ~110 were cleanups, doc conversions, treewide refactors,
> and one partial removal (caif_hsi, ca75bcf0a83b).
> 
> We are still getting fixes to this code, in the last 10 days there were
> 3 reports on security@ about CAIF that I have been CCed on.
> 
> UAPI constants (AF_CAIF, ARPHRD_CAIF, N_CAIF, VIRTIO_ID_CAIF) and the
> SELinux classmap entry are intentionally kept for ABI stability.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> I think we should accumulate such patches over the coming days on a separate
> branch. CAIF is a no-brainer IMO but other removals may be more controversial.

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* Re: [PATCH net-deletions v2] net: remove unused ATM protocols and legacy ATM device drivers
From: Simon Horman @ 2026-04-22 10:53 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, edumazet, pabeni, andrew+netdev, corbet, skhan,
	linux, tsbogend, maddy, mpe, npiggin, chleroy, 3chas3, razor,
	idosch, jani.nikula, mchehab+huawei, tytso, herbert, geert,
	ebiggers, johannes.berg, jonathan.cameron, kees, kuniyu,
	fourier.thomas, andriy.shevchenko, rdunlap, akpm, linux-doc,
	linux-mips, linuxppc-dev, bridge, dwmw2
In-Reply-To: <20260422041846.2035118-1-kuba@kernel.org>

On Tue, Apr 21, 2026 at 09:18:44PM -0700, Jakub Kicinski wrote:
> Remove the ATM protocol modules and PCI/SBUS ATM device drivers
> that are no longer in active use.
> 
> The ATM core protocol stack, PPPoATM, BR2684, and USB DSL modem
> drivers (drivers/usb/atm/) are retained in-tree to maintain PPP
> over ATM (PPPoA) and PPPoE-over-BR2684 support for DSL connections.
> 
> Removed ATM protocol modules:
>  - net/atm/clip.c - Classical IP over ATM (RFC 2225)
>  - net/atm/lec.c - LAN Emulation Client (LANE)
>  - net/atm/mpc.c, mpoa_caches.c, mpoa_proc.c - Multi-Protocol Over ATM
> 
> Removed PCI/SBUS ATM device drivers (drivers/atm/):
>  - adummy, atmtcp - software/testing ATM devices
>  - eni - Efficient Networks ENI155P (OC-3, ~1995)
>  - fore200e - FORE Systems 200E PCI/SBUS (OC-3, ~1999)
>  - he - ForeRunner HE (OC-3/OC-12, ~2000)
>  - idt77105 - IDT 77105 25 Mbps ATM PHY
>  - idt77252 - IDT 77252 NICStAR II (OC-3, ~2000)
>  - iphase - Interphase ATM PCI (OC-3/DS3/E3)
>  - lanai - Efficient Networks Speedstream 3010
>  - nicstar - IDT 77201 NICStAR (155/25 Mbps, ~1999)
>  - solos-pci - Traverse Technologies ADSL2+ PCI
>  - suni - PMC S/UNI SONET PHY library
> 
> Also clean up references in:
>  - net/bridge/ - remove ATM LANE hook (br_fdb_test_addr_hook,
>    br_fdb_test_addr)
>  - net/core/dev.c - remove br_fdb_test_addr_hook export
>  - defconfig files - remove ATM driver config options
> 
> The removed code is moved to an out-of-tree module package (mod-orphan).
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
> v2:
>  - keep BR2684
>  - correct the claim that Traverse Technologies is defunct,
>    I'm still deleting the solos driver, chances are nobody uses it.
>    Easy enough to revert back in since core is still around.
>    The guiding principle is to keep USB modems and delete
>    the rest as USB ADSL2+ CPEs were most popular historically.
> v1: https://lore.kernel.org/20260421021943.1295109-1-kuba@kernel.org

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* Re: [PATCH net-deletions] net: remove ISDN subsystem and Bluetooth CMTP
From: Simon Horman @ 2026-04-22 10:55 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem, netdev, edumazet, pabeni, andrew+netdev, corbet, skhan,
	marcel, luiz.dentz, mchehab+huawei, jani.nikula, gregkh, demarchi,
	rdunlap, justonli, ivecera, jonathan.cameron, kees,
	marco.crivellari, ferr.lambarginio, nihaal, mingo, tglx, linmq006,
	linux-doc, linux-bluetooth
In-Reply-To: <20260421022108.1299678-1-kuba@kernel.org>

On Mon, Apr 20, 2026 at 07:21:07PM -0700, Jakub Kicinski wrote:
> Remove the ISDN (mISDN, CAPI) subsystem and Bluetooth CMTP protocol
> from the kernel tree.
> 
> ISDN is a pretty old technology and it's unclear whether anyone still
> uses it. I went over the last few years of git history and all the
> commits are either tree-wide conversions or syzbot/static analyzer
> fixes.
> 
> When we discussed removal in the past IIRC there were some concerns
> about ISDN still being used in parts of Germany. Unfortunately, the
> code base is quite old, none of the current maintainers are familiar
> with it and AI tools will have a field day finding bugs here.
> 
> Delete this code and preserve it in an out-of-tree repository
> for any remaining users:
> https://github.com/linux-netdev/mod-orphan
> 
> UAPI constants AF_ISDN/PF_ISDN and the SELinux isdn_socket class
> are preserved for ABI stability, but the rest of uAPI is removed.
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Reviewed-by: Simon Horman <horms@kernel.org>


^ permalink raw reply

* [PATCH v2] net/intel: Replace manual array size calculation with ARRAY_SIZE
From: Jakub Raczynski @ 2026-04-22 10:57 UTC (permalink / raw)
  To: error27
  Cc: netdev, kuba, przemyslaw.kitszel, anthony.l.nguyen,
	kernel-janitors, Jakub Raczynski
In-Reply-To: <aeeFh1zQqhVysvxI@stanley.mountain>

There are still places in the code where manual calculation of array size
exist, but it is good to enforce usage of single macro through the whole
code as it makes code bit more readable.
While at it, beautify condition surrounding it by reversing check and remove
unnecessary casting.

Signed-off-by: Jakub Raczynski <j.raczynski@samsung.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq.h | 2 +-
 drivers/net/ethernet/intel/iavf/iavf_adminq.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
index 1be97a3a86ce..dcf3baec7b73 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
@@ -109,7 +109,7 @@ static inline int i40e_aq_rc_to_posix(int aq_ret, int aq_rc)
 		-EFBIG,      /* I40E_AQ_RC_EFBIG */
 	};
 
-	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
+	if (aq_rc >= ARRAY_SIZE(aq_to_posix))
 		return -ERANGE;
 
 	return aq_to_posix[aq_rc];
diff --git a/drivers/net/ethernet/intel/iavf/iavf_adminq.h b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
index bbf5c4b3a2ae..dd2f61172157 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_adminq.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_adminq.h
@@ -113,7 +113,7 @@ static inline int iavf_aq_rc_to_posix(int aq_ret, int aq_rc)
 	if (aq_ret == IAVF_ERR_ADMIN_QUEUE_TIMEOUT)
 		return -EAGAIN;
 
-	if (!((u32)aq_rc < (sizeof(aq_to_posix) / sizeof((aq_to_posix)[0]))))
+	if (aq_rc >= ARRAY_SIZE(aq_to_posix))
 		return -ERANGE;
 
 	return aq_to_posix[aq_rc];
-- 
2.34.1


^ permalink raw reply related

* [PATCH] ipv6: udp: fix memory leak in udpv6_sendmsg error path
From: Mingyu Wang @ 2026-04-22 10:58 UTC (permalink / raw)
  To: willemdebruijn.kernel, davem, dsahern, edumazet, kuba, pabeni
  Cc: horms, netdev, linux-kernel, Mingyu Wang

During fuzzing with failslab enabled, a memory leak was observed in the
IPv6 UDP send path.

When sending via the lockless fast path (!corkreq), udpv6_sendmsg()
calls ip6_make_skb() and assumes that the routing entry (dst_entry)
reference has been stolen by the callee. However, if ip6_make_skb()
fails early (e.g., due to an ENOMEM from memory allocation failure),
it returns an error pointer without consuming the dst reference.

Since udpv6_sendmsg() unconditionally jumps to the 'out_no_dst' label,
the unconsumed dst_entry is never released, resulting in a memory leak.

Fix this by explicitly calling dst_release(dst) when ip6_make_skb()
returns an error.

Signed-off-by: Mingyu Wang <25181214217@stu.xidian.edu.cn>
---
 net/ipv6/udp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 15e032194ecc..b83ecfd729af 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1706,8 +1706,11 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 				   dst_rt6_info(dst),
 				   msg->msg_flags, &cork);
 		err = PTR_ERR(skb);
-		if (!IS_ERR_OR_NULL(skb))
+		if (!IS_ERR_OR_NULL(skb)) {
 			err = udp_v6_send_skb(skb, fl6, &cork.base);
+		} else {
+			dst_release(dst);
+		}
 		/* ip6_make_skb steals dst reference */
 		goto out_no_dst;
 	}
-- 
2.34.1

^ permalink raw reply related

* Re: [PATCH net v3 1/1] net: hsr: limit node table growth
From: Sebastian Andrzej Siewior @ 2026-04-22 10:58 UTC (permalink / raw)
  To: Felix Maurer
  Cc: Ren Wei, netdev, davem, edumazet, kuba, pabeni, horms, kees,
	kexinsun, luka.gejak, Arvid.Brodin, m-karicheri2, yuantan098,
	yifanwucs, tomapufckgml, bird, xuyuqiabc, royenheart
In-Reply-To: <aeiYpkiWUD5MtGEB@thinkpad>

On 2026-04-22 11:45:38 [+0200], Felix Maurer wrote:
> > I don't think the node count exceeds 100 in production. So having a
> > counter which is incremented while adding to the list and decremented
> > while removing items from the list would optimize the "worst case". So
> > instead traversing the list with 1000 we would just give up.
> 
> The counter is what I had in mind. I agree that allocating under the
> lock isn't what we want.
> 
> I'd argue counting through the whole list is the normal case.

yeah but counting here is just a register increment which is cheap. 

> hsr_add_node() is only called after the node table has been searched
> already (without the lock). Here we go through the whole list again
> under the lock to prevent TOCTOU-type situations.
> 
> I agree that, overall, it would be optimizing the worst case, but I
> think it may be worth it to prevent the memory allocations and walking
> the whole list. But I'd go along with the (current) on-the-fly counting
> as well.

Yeah. But then you have to manage the counter on add and removal just
for this "we have too many nodes" case. And theoretically you would have
to hold the list_lock while checking the counter because nodes might be
added on both sides in the RX path (unless you check early lockless &
optimistic and then again before adding under the lock).

So overall this looks simpler.

> Thanks,
>    Felix

Sebastian

^ permalink raw reply

* Re: [syzbot] [net?] kernel BUG in pn_socket_autobind
From: syzbot @ 2026-04-22 11:16 UTC (permalink / raw)
  To: courmisch, davem, edumazet, horms, kuba, linux-kernel, netdev,
	pabeni, syzkaller-bugs
In-Reply-To: <69e79944.a00a0220.17a17.001a.GAE@google.com>

syzbot has found a reproducer for the following issue on:

HEAD commit:    6596a02b2078 Merge tag 'drm-next-2026-04-22' of https://gi..
git tree:       upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=15ca74ce580000
kernel config:  https://syzkaller.appspot.com/x/.config?x=3e19fa1907a3dfda
dashboard link: https://syzkaller.appspot.com/bug?extid=b3c0e6a240078433c42b
compiler:       gcc (Debian 14.2.0-19) 14.2.0, GNU ld (GNU Binutils for Debian) 2.44
syz repro:      https://syzkaller.appspot.com/x/repro.syz?x=17330cce580000
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=107ed1ba580000

Downloadable assets:
disk image (non-bootable): https://storage.googleapis.com/syzbot-assets/d900f083ada3/non_bootable_disk-6596a02b.raw.xz
vmlinux: https://storage.googleapis.com/syzbot-assets/1e78cc4623b5/vmlinux-6596a02b.xz
kernel image: https://storage.googleapis.com/syzbot-assets/7ae78b19bafa/bzImage-6596a02b.xz

IMPORTANT: if you fix the issue, please add the following tag to the commit:
Reported-by: syzbot+b3c0e6a240078433c42b@syzkaller.appspotmail.com

------------[ cut here ]------------
kernel BUG at net/phonet/socket.c:213!
Oops: invalid opcode: 0000 [#1] SMP KASAN NOPTI
CPU: 0 UID: 0 PID: 6116 Comm: syz.0.17 Not tainted syzkaller #0 PREEMPT(full) 
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
RIP: 0010:pn_socket_autobind net/phonet/socket.c:213 [inline]
RIP: 0010:pn_socket_autobind+0x14c/0x170 net/phonet/socket.c:202
Code: 00 00 00 00 48 8b 44 24 58 65 48 2b 05 55 4d 47 09 75 2a 48 83 c4 60 89 d8 5b 5d 41 5c 41 5d e9 1a f8 d0 00 e8 15 78 3c f7 90 <0f> 0b e8 cd 30 aa f7 eb 9e e8 26 31 aa f7 e9 6c ff ff ff e8 7c b8
RSP: 0018:ffffc90003d5fa30 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff8acd028a
RDX: ffff88802b48ca00 RSI: ffffffff8acd02cb RDI: ffff88802b48ca00
RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff920007abf46
R13: dffffc0000000000 R14: 1ffff920007abf61 R15: ffffc90003d5fe48
FS:  00007f523dc816c0(0000) GS:ffff8880d62db000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fdf9b231e9c CR3: 000000004a755000 CR4: 0000000000352ef0
Call Trace:
 <TASK>
 pn_socket_sendmsg+0x43/0xe0 net/phonet/socket.c:421
 sock_sendmsg_nosec net/socket.c:787 [inline]
 __sock_sendmsg net/socket.c:802 [inline]
 ____sys_sendmsg+0x9e1/0xb70 net/socket.c:2698
 ___sys_sendmsg+0x190/0x1e0 net/socket.c:2752
 __sys_sendmsg+0x170/0x220 net/socket.c:2784
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0x10b/0xf80 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7f523cd9c819
Code: ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 e8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f523dc81028 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f523d015fa0 RCX: 00007f523cd9c819
RDX: 0000000004044004 RSI: 0000200000000200 RDI: 000000000000000a
RBP: 00007f523ce32c91 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007f523d016038 R14: 00007f523d015fa0 R15: 00007ffe40f56cd8
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:pn_socket_autobind net/phonet/socket.c:213 [inline]
RIP: 0010:pn_socket_autobind+0x14c/0x170 net/phonet/socket.c:202
Code: 00 00 00 00 48 8b 44 24 58 65 48 2b 05 55 4d 47 09 75 2a 48 83 c4 60 89 d8 5b 5d 41 5c 41 5d e9 1a f8 d0 00 e8 15 78 3c f7 90 <0f> 0b e8 cd 30 aa f7 eb 9e e8 26 31 aa f7 e9 6c ff ff ff e8 7c b8
RSP: 0018:ffffc90003d5fa30 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffffffff8acd028a
RDX: ffff88802b48ca00 RSI: ffffffff8acd02cb RDI: ffff88802b48ca00
RBP: 0000000000000000 R08: 0000000000000003 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 1ffff920007abf46
R13: dffffc0000000000 R14: 1ffff920007abf61 R15: ffffc90003d5fe48
FS:  00007f523dc816c0(0000) GS:ffff8880d62db000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fdf9b231e9c CR3: 000000004a755000 CR4: 0000000000352ef0


---
If you want syzbot to run the reproducer, reply with:
#syz test: git://repo/address.git branch-or-commit-hash
If you attach or paste a git patch, syzbot will apply it before testing.

^ permalink raw reply

* [PATCH 0/5] io_uring/zcrx: add CQE based notifications and stats reporting
From: Clément Léger @ 2026-04-22 11:25 UTC (permalink / raw)
  To: io-uring, Pavel Begunkov, Jens Axboe
  Cc: Clément Léger, linux-doc, linux-kernel, linux-kselftest,
	netdev, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Simon Horman, Jonathan Corbet, Shuah Khan,
	Vishwanath Seshagiri

The zcrx path can encounter various conditions that lead to internal
fallbacks or errors. These errors can have a large impact on performance
and functionality but are not yet not being reported to the user which
is then unable to take action.

This series addresses this problem by adding a new notification system
paired with a statistics structure. The notification system currently
report out of buffer and packets that fallback to copy. The statistics
structure report the number and total size of packets that were copied
rather than received via the zero-copy path.

The out of buffer notification allows the user to actually adjust the
buffer sizing when registering zcrx support for the ifq. Some future
work could allow the user to add more memory on the fly to the pool so
the page allocator doesn't run out of memory.

This series can be tested using the include kselftest modification and
using the liburing series that updates headers and tests/examples so
that it uses notifications and statistics.

Clément Léger (4):
  io_uring/zcrx: notify user on frag copy fallback
  io_uring/zcrx: add shared-memory notification statistics
  Documentation: networking: document zcrx notifications and statistics
  selftests: iou-zcrx: add notification and stats test for zcrx

Pavel Begunkov (1):
  io_uring/zcrx: notify user when out of buffers

 Documentation/networking/iou-zcrx.rst         | 106 ++++++++++++
 include/uapi/linux/io_uring/query.h           |  12 ++
 include/uapi/linux/io_uring/zcrx.h            |  34 +++-
 io_uring/query.c                              |  14 ++
 io_uring/zcrx.c                               | 151 +++++++++++++++++-
 io_uring/zcrx.h                               |  13 +-
 .../selftests/drivers/net/hw/iou-zcrx.c       | 112 +++++++++++--
 .../selftests/drivers/net/hw/iou-zcrx.py      |  49 +++++-
 8 files changed, 475 insertions(+), 16 deletions(-)

-- 
Clément Léger

^ permalink raw reply

* [PATCH 1/5] io_uring/zcrx: notify user when out of buffers
From: Clément Léger @ 2026-04-22 11:25 UTC (permalink / raw)
  To: io-uring, Pavel Begunkov, Jens Axboe
  Cc: linux-doc, linux-kernel, linux-kselftest, netdev, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Simon Horman,
	Jonathan Corbet, Shuah Khan, Vishwanath Seshagiri,
	Vishwanath Seshagiri
In-Reply-To: <20260422112522.3316660-1-cleger@meta.com>

From: Pavel Begunkov <asml.silence@gmail.com>

There are currently no easy ways for the user to know if zcrx is out of
buffers and page pool fails to allocate. Add uapi for zcrx to communicate
it back.

It's implemented as a separate CQE, which for now is posted to the creator
ctx. To use it, on registration the user space needs to pass an instance
of struct zcrx_notification_desc, which tells the kernel the user_data
for resulting CQEs and which event types are expected / allowed.

When an allowed event happens, zcrx will post a CQE containing the
specified user_data, and lower bits of cqe->res will be set to the event
mask. Before the kernel could post another notification of the given
type, the user needs to acknowledge that it processed the previous one
by issuing IORING_REGISTER_ZCRX_CTRL with ZCRX_CTRL_ARM_NOTIFICATION.

The only notification type the patch implements yet is
ZCRX_NOTIF_NO_BUFFERS. Next commit adds copy fallback signaling.

Co-developed-by: Vishwanath Seshagiri <vishs@meta.com>
Signed-off-by: Vishwanath Seshagiri <vishs@meta.com>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 include/uapi/linux/io_uring/zcrx.h | 22 ++++++-
 io_uring/zcrx.c                    | 98 +++++++++++++++++++++++++++++-
 io_uring/zcrx.h                    | 11 +++-
 3 files changed, 128 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/io_uring/zcrx.h b/include/uapi/linux/io_uring/zcrx.h
index 5ce02c7a6096..b8596d7d47b6 100644
--- a/include/uapi/linux/io_uring/zcrx.h
+++ b/include/uapi/linux/io_uring/zcrx.h
@@ -65,6 +65,18 @@ enum zcrx_features {
 	 * value in struct io_uring_zcrx_ifq_reg::rx_buf_len.
 	 */
 	ZCRX_FEATURE_RX_PAGE_SIZE	= 1 << 0,
+	ZCRX_FEATURE_NOTIFICATION	= 1 << 1,
+};
+
+enum zcrx_notification_type {
+	ZCRX_NOTIF_NO_BUFFERS = 1 << 0,
+};
+
+struct zcrx_notification_desc {
+	__u64	user_data;
+	__u32	type_mask;
+	__u32	__resv1;
+	__u64	__resv2[10];
 };
 
 /*
@@ -82,12 +94,14 @@ struct io_uring_zcrx_ifq_reg {
 	struct io_uring_zcrx_offsets offsets;
 	__u32	zcrx_id;
 	__u32	rx_buf_len;
-	__u64	__resv[3];
+	__u64	notif_desc; /* see struct zcrx_notification_desc */
+	__u64	__resv[2];
 };
 
 enum zcrx_ctrl_op {
 	ZCRX_CTRL_FLUSH_RQ,
 	ZCRX_CTRL_EXPORT,
+	ZCRX_CTRL_ARM_NOTIFICATION,
 
 	__ZCRX_CTRL_LAST,
 };
@@ -101,6 +115,11 @@ struct zcrx_ctrl_export {
 	__u32 		__resv1[11];
 };
 
+struct zcrx_ctrl_arm_notif {
+	__u32		type_mask;
+	__u32		__resv[11];
+};
+
 struct zcrx_ctrl {
 	__u32	zcrx_id;
 	__u32	op; /* see enum zcrx_ctrl_op */
@@ -109,6 +128,7 @@ struct zcrx_ctrl {
 	union {
 		struct zcrx_ctrl_export		zc_export;
 		struct zcrx_ctrl_flush_rq	zc_flush;
+		struct zcrx_ctrl_arm_notif	zc_arm_notif;
 	};
 };
 
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 9a83d7eb4210..35ca28cb6583 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -44,6 +44,16 @@ static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *nio
 	return container_of(owner, struct io_zcrx_area, nia);
 }
 
+static bool zcrx_set_ring_ctx(struct io_zcrx_ifq *zcrx, struct io_ring_ctx *ctx)
+{
+	guard(spinlock_bh)(&zcrx->ctx_lock);
+	if (zcrx->master_ctx)
+		return false;
+	percpu_ref_get(&ctx->refs);
+	zcrx->master_ctx = ctx;
+	return true;
+}
+
 static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
 {
 	struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
@@ -531,6 +541,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx)
 
 	ifq->if_rxq = -1;
 	spin_lock_init(&ifq->rq.lock);
+	spin_lock_init(&ifq->ctx_lock);
 	mutex_init(&ifq->pp_lock);
 	refcount_set(&ifq->refs, 1);
 	refcount_set(&ifq->user_refs, 1);
@@ -585,6 +596,11 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 	if (ifq->dev)
 		put_device(ifq->dev);
 
+	scoped_guard(spinlock_bh, &ifq->ctx_lock) {
+		if (ifq->master_ctx)
+			percpu_ref_put(&ifq->master_ctx->refs);
+	}
+
 	io_free_rbuf_ring(ifq);
 	mutex_destroy(&ifq->pp_lock);
 	kfree(ifq);
@@ -738,6 +754,8 @@ static int import_zcrx(struct io_ring_ctx *ctx,
 		return -EINVAL;
 	if (reg->if_rxq || reg->rq_entries || reg->area_ptr || reg->region_ptr)
 		return -EINVAL;
+	if (reg->notif_desc)
+		return -EINVAL;
 	if (reg->flags & ~ZCRX_REG_IMPORT)
 		return -EINVAL;
 
@@ -826,6 +844,7 @@ static int zcrx_register_netdev(struct io_zcrx_ifq *ifq,
 int io_register_zcrx(struct io_ring_ctx *ctx,
 		     struct io_uring_zcrx_ifq_reg __user *arg)
 {
+	struct zcrx_notification_desc notif;
 	struct io_uring_zcrx_area_reg area;
 	struct io_uring_zcrx_ifq_reg reg;
 	struct io_uring_region_desc rd;
@@ -869,10 +888,22 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
 	if (copy_from_user(&area, u64_to_user_ptr(reg.area_ptr), sizeof(area)))
 		return -EFAULT;
 
+	memset(&notif, 0, sizeof(notif));
+	if (reg.notif_desc && copy_from_user(&notif, u64_to_user_ptr(reg.notif_desc),
+					     sizeof(notif)))
+		return -EFAULT;
+	if (notif.type_mask & ~ZCRX_NOTIF_TYPE_MASK)
+		return -EINVAL;
+	if (notif.__resv1 || !mem_is_zero(&notif.__resv2, sizeof(notif.__resv2)))
+		return -EINVAL;
+
 	ifq = io_zcrx_ifq_alloc(ctx);
 	if (!ifq)
 		return -ENOMEM;
 
+	ifq->notif_data = notif.user_data;
+	ifq->allowed_notif_mask = notif.type_mask;
+
 	if (ctx->user) {
 		get_uid(ctx->user);
 		ifq->user = ctx->user;
@@ -923,6 +954,9 @@ int io_register_zcrx(struct io_ring_ctx *ctx,
 		ret = -EFAULT;
 		goto err;
 	}
+
+	if (notif.type_mask)
+		zcrx_set_ring_ctx(ifq, ctx);
 	return 0;
 err:
 	scoped_guard(mutex, &ctx->mmap_lock)
@@ -1089,6 +1123,46 @@ static unsigned io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *if
 	return allocated;
 }
 
+static void zcrx_notif_tw(struct io_tw_req tw_req, io_tw_token_t tw)
+{
+	struct io_kiocb *req = tw_req.req;
+	struct io_ring_ctx *ctx = req->ctx;
+
+	io_post_aux_cqe(ctx, req->cqe.user_data, req->cqe.res, 0);
+	percpu_ref_put(&ctx->refs);
+	kfree_rcu(req, rcu_head);
+}
+
+static void zcrx_send_notif(struct io_zcrx_ifq *ifq, u32 type_mask)
+{
+	gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO;
+	struct io_kiocb *req;
+
+	if (!(type_mask & ifq->allowed_notif_mask))
+		return;
+
+	guard(spinlock_bh)(&ifq->ctx_lock);
+	if (!ifq->master_ctx)
+		return;
+	if (type_mask & ifq->fired_notifs)
+		return;
+
+	req = kmem_cache_alloc(req_cachep, gfp);
+	if (unlikely(!req))
+		return;
+
+	ifq->fired_notifs |= type_mask;
+
+	req->opcode = IORING_OP_NOP;
+	req->cqe.user_data = ifq->notif_data;
+	req->cqe.res = type_mask;
+	req->ctx = ifq->master_ctx;
+	percpu_ref_get(&req->ctx->refs);
+	req->tctx = NULL;
+	req->io_task_work.func = zcrx_notif_tw;
+	io_req_task_work_add(req);
+}
+
 static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp)
 {
 	struct io_zcrx_ifq *ifq = io_pp_to_ifq(pp);
@@ -1105,8 +1179,10 @@ static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp)
 		goto out_return;
 
 	allocated = io_zcrx_refill_slow(pp, ifq, netmems, to_alloc);
-	if (!allocated)
+	if (!allocated) {
+		zcrx_send_notif(ifq, ZCRX_NOTIF_NO_BUFFERS);
 		return 0;
+	}
 out_return:
 	zcrx_sync_for_device(pp, ifq, netmems, allocated);
 	allocated--;
@@ -1255,12 +1331,30 @@ static int zcrx_flush_rq(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx,
 	return 0;
 }
 
+static int zcrx_arm_notif(struct io_ring_ctx *ctx, struct io_zcrx_ifq *zcrx,
+			  struct zcrx_ctrl *ctrl)
+{
+	const struct zcrx_ctrl_arm_notif *an = &ctrl->zc_arm_notif;
+
+	if (an->type_mask & ~ZCRX_NOTIF_TYPE_MASK)
+		return -EINVAL;
+	if (!mem_is_zero(&an->__resv, sizeof(an->__resv)))
+		return -EINVAL;
+
+	guard(spinlock_bh)(&zcrx->ctx_lock);
+	if (an->type_mask & ~zcrx->fired_notifs)
+		return -EINVAL;
+	zcrx->fired_notifs &= ~an->type_mask;
+	return 0;
+}
+
 int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 {
 	struct zcrx_ctrl ctrl;
 	struct io_zcrx_ifq *zcrx;
 
 	BUILD_BUG_ON(sizeof(ctrl.zc_export) != sizeof(ctrl.zc_flush));
+	BUILD_BUG_ON(sizeof(ctrl.zc_export) != sizeof(ctrl.zc_arm_notif));
 
 	if (nr_args)
 		return -EINVAL;
@@ -1278,6 +1372,8 @@ int io_zcrx_ctrl(struct io_ring_ctx *ctx, void __user *arg, unsigned nr_args)
 		return zcrx_flush_rq(ctx, zcrx, &ctrl);
 	case ZCRX_CTRL_EXPORT:
 		return zcrx_export(ctx, zcrx, &ctrl, arg);
+	case ZCRX_CTRL_ARM_NOTIFICATION:
+		return zcrx_arm_notif(ctx, zcrx, &ctrl);
 	}
 
 	return -EOPNOTSUPP;
diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h
index 75e0a4e6ef6e..3ddebed06d57 100644
--- a/io_uring/zcrx.h
+++ b/io_uring/zcrx.h
@@ -9,7 +9,9 @@
 #include <net/net_trackers.h>
 
 #define ZCRX_SUPPORTED_REG_FLAGS	(ZCRX_REG_IMPORT | ZCRX_REG_NODEV)
-#define ZCRX_FEATURES			(ZCRX_FEATURE_RX_PAGE_SIZE)
+#define ZCRX_FEATURES			(ZCRX_FEATURE_RX_PAGE_SIZE |\
+					 ZCRX_FEATURE_NOTIFICATION)
+#define ZCRX_NOTIF_TYPE_MASK		(ZCRX_NOTIF_NO_BUFFERS)
 
 struct io_zcrx_mem {
 	unsigned long			size;
@@ -72,6 +74,13 @@ struct io_zcrx_ifq {
 	 */
 	struct mutex			pp_lock;
 	struct io_mapped_region		rq_region;
+
+	/* Locks the access to notifification context data */
+	spinlock_t			ctx_lock;
+	struct io_ring_ctx		*master_ctx;
+	u32				allowed_notif_mask;
+	u32				fired_notifs;
+	u64				notif_data;
 };
 
 #if defined(CONFIG_IO_URING_ZCRX)
-- 
2.52.0


^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox