Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 0/3] net: add retry mechanism to ndo_set_rx_mode_async
@ 2026-05-27  1:41 Stanislav Fomichev
  2026-05-27  1:41 ` [PATCH net-next 1/3] net: change ndo_set_rx_mode_async return type to int Stanislav Fomichev
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2026-05-27  1:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, kuba, pabeni, Zijing Yin

Original async ndo_set_rx_mode work left one place where we do netdev_WARN
in response to a ENOMEM. The intent was to see whether actual real
users can hit that (adding uc/mc under memory pressure seems like a
very unlikely thing to do). However, it was quickly triggered by
syzbot's failslab. Add a retry mechanism and downgrade netdev_WARN
to netdev_err. The retry logic is a typical exponential backoff:
1, 2, 4, 8 seconds, 15 in total, hopefully enough for a system to resolve
memory pressure.

Link: https://lore.kernel.org/netdev/20260416185712.2155425-1-sdf@fomichev.me/
Link: https://lore.kernel.org/netdev/20260519095557.3749407-1-yzjaurora@gmail.com/
Cc: Zijing Yin <yzjaurora@gmail.com>

Stanislav Fomichev (3):
  net: change ndo_set_rx_mode_async return type to int
  net: add retry mechanism to ndo_set_rx_mode_async
  bnxt: convert to core rx_mode retry mechanism

 drivers/infiniband/ulp/ipoib/ipoib_main.c     |  9 +--
 drivers/net/dummy.c                           |  7 ++-
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 46 ++++++---------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  2 -
 drivers/net/ethernet/intel/iavf/iavf_main.c   | 10 +++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c |  8 ++-
 .../net/ethernet/meta/fbnic/fbnic_netdev.c    |  8 ++-
 drivers/net/netdevsim/netdev.c                |  7 ++-
 drivers/net/netkit.c                          |  7 ++-
 include/linux/netdevice.h                     | 16 ++++--
 net/core/dev.c                                |  7 ++-
 net/core/dev.h                                |  2 +
 net/core/dev_addr_lists.c                     | 56 ++++++++++++++++++-
 13 files changed, 124 insertions(+), 61 deletions(-)

-- 
2.53.0-Meta


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/3] net: change ndo_set_rx_mode_async return type to int
  2026-05-27  1:41 [PATCH net-next 0/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
@ 2026-05-27  1:41 ` Stanislav Fomichev
  2026-05-27  1:41 ` [PATCH net-next 2/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
  2026-05-27  1:41 ` [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism Stanislav Fomichev
  2 siblings, 0 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2026-05-27  1:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, kuba, pabeni

Change the return type of ndo_set_rx_mode_async from void to int to
allow drivers to report failures back to the core stack. This is a
prerequisite for adding retry logic in the core when drivers fail to
program RX filters (e.g. bnxt VF when PF is unavailable).

All existing implementations return 0 for now, maintaining current
behavior.

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c         |  9 +++++----
 drivers/net/dummy.c                               |  7 ++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         | 10 ++++++----
 drivers/net/ethernet/intel/iavf/iavf_main.c       | 10 +++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c |  8 +++++---
 drivers/net/ethernet/meta/fbnic/fbnic_netdev.c    |  8 +++++---
 drivers/net/netdevsim/netdev.c                    |  7 ++++---
 drivers/net/netkit.c                              |  7 ++++---
 include/linux/netdevice.h                         | 11 ++++++-----
 9 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 3e1e1e861739..16a015b67206 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1297,18 +1297,19 @@ static int ipoib_hard_header(struct sk_buff *skb,
 	return IPOIB_HARD_LEN;
 }
 
-static void ipoib_set_rx_mode_async(struct net_device *dev,
-				    struct netdev_hw_addr_list *uc,
-				    struct netdev_hw_addr_list *mc)
+static int ipoib_set_rx_mode_async(struct net_device *dev,
+				   struct netdev_hw_addr_list *uc,
+				   struct netdev_hw_addr_list *mc)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
 
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
 		ipoib_dbg(priv, "IPOIB_FLAG_OPER_UP not set");
-		return;
+		return 0;
 	}
 
 	queue_work(priv->wq, &priv->restart_task);
+	return 0;
 }
 
 static int ipoib_get_iflink(const struct net_device *dev)
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index f6732eab5923..36a4e85c6668 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -47,10 +47,11 @@
 static int numdummies = 1;
 
 /* fake multicast ability */
-static void set_multicast_list(struct net_device *dev,
-			       struct netdev_hw_addr_list *uc,
-			       struct netdev_hw_addr_list *mc)
+static int set_multicast_list(struct net_device *dev,
+			      struct netdev_hw_addr_list *uc,
+			      struct netdev_hw_addr_list *mc)
 {
+	return 0;
 }
 
 static void dummy_get_stats64(struct net_device *dev,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index d4f93e62f583..8459a21cf0a9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -13679,9 +13679,9 @@ static bool bnxt_uc_list_updated(struct bnxt *bp,
 	return false;
 }
 
-static void bnxt_set_rx_mode(struct net_device *dev,
-			     struct netdev_hw_addr_list *uc,
-			     struct netdev_hw_addr_list *mc)
+static int bnxt_set_rx_mode(struct net_device *dev,
+			    struct netdev_hw_addr_list *uc,
+			    struct netdev_hw_addr_list *mc)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	struct bnxt_vnic_info *vnic;
@@ -13690,7 +13690,7 @@ static void bnxt_set_rx_mode(struct net_device *dev,
 	u32 mask;
 
 	if (!test_bit(BNXT_STATE_OPEN, &bp->state))
-		return;
+		return 0;
 
 	vnic = &bp->vnic_info[BNXT_VNIC_DEFAULT];
 	mask = vnic->rx_mask;
@@ -13718,6 +13718,8 @@ static void bnxt_set_rx_mode(struct net_device *dev,
 
 		bnxt_cfg_rx_mode(bp, uc, uc_update);
 	}
+
+	return 0;
 }
 
 static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc,
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 8b53ffb75650..29b8403a066b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -1134,10 +1134,12 @@ bool iavf_promiscuous_mode_changed(struct iavf_adapter *adapter)
  * @netdev: network interface device structure
  * @uc: snapshot of uc address list
  * @mc: snapshot of mc address list
+ *
+ * Return: 0 on success.
  **/
-static void iavf_set_rx_mode(struct net_device *netdev,
-			     struct netdev_hw_addr_list *uc,
-			     struct netdev_hw_addr_list *mc)
+static int iavf_set_rx_mode(struct net_device *netdev,
+			    struct netdev_hw_addr_list *uc,
+			    struct netdev_hw_addr_list *mc)
 {
 	struct iavf_adapter *adapter = netdev_priv(netdev);
 
@@ -1150,6 +1152,8 @@ static void iavf_set_rx_mode(struct net_device *netdev,
 	if (iavf_promiscuous_mode_changed(adapter))
 		adapter->aq_required |= IAVF_FLAG_AQ_CONFIGURE_PROMISC_MODE;
 	spin_unlock_bh(&adapter->current_netdev_promisc_flags_lock);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c5d26c6829a0..775f0c6e55c9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4142,13 +4142,15 @@ static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 }
 
-static void mlx5e_set_rx_mode(struct net_device *dev,
-			      struct netdev_hw_addr_list *uc,
-			      struct netdev_hw_addr_list *mc)
+static int mlx5e_set_rx_mode(struct net_device *dev,
+			     struct netdev_hw_addr_list *uc,
+			     struct netdev_hw_addr_list *mc)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
 	mlx5e_fs_set_rx_mode_work(priv->fs, dev, uc, mc);
+
+	return 0;
 }
 
 static int mlx5e_set_mac(struct net_device *netdev, void *addr)
diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
index f99ca551c1ce..dd77ab6052c8 100644
--- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
+++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c
@@ -240,9 +240,9 @@ void __fbnic_set_rx_mode(struct fbnic_dev *fbd,
 	fbnic_write_tce_tcam(fbd);
 }
 
-static void fbnic_set_rx_mode(struct net_device *netdev,
-			      struct netdev_hw_addr_list *uc,
-			      struct netdev_hw_addr_list *mc)
+static int fbnic_set_rx_mode(struct net_device *netdev,
+			     struct netdev_hw_addr_list *uc,
+			     struct netdev_hw_addr_list *mc)
 {
 	struct fbnic_net *fbn = netdev_priv(netdev);
 	struct fbnic_dev *fbd = fbn->fbd;
@@ -250,6 +250,8 @@ static void fbnic_set_rx_mode(struct net_device *netdev,
 	/* No need to update the hardware if we are not running */
 	if (netif_running(netdev))
 		__fbnic_set_rx_mode(fbd, uc, mc);
+
+	return 0;
 }
 
 static int fbnic_set_mac(struct net_device *netdev, void *p)
diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index a750768912b5..27e5f109f933 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -185,10 +185,11 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void nsim_set_rx_mode(struct net_device *dev,
-			     struct netdev_hw_addr_list *uc,
-			     struct netdev_hw_addr_list *mc)
+static int nsim_set_rx_mode(struct net_device *dev,
+			    struct netdev_hw_addr_list *uc,
+			    struct netdev_hw_addr_list *mc)
 {
+	return 0;
 }
 
 static int nsim_change_mtu(struct net_device *dev, int new_mtu)
diff --git a/drivers/net/netkit.c b/drivers/net/netkit.c
index 0ad6a806d7d5..a3931cd82132 100644
--- a/drivers/net/netkit.c
+++ b/drivers/net/netkit.c
@@ -186,11 +186,12 @@ static int netkit_get_iflink(const struct net_device *dev)
 	return iflink;
 }
 
-static void netkit_set_multicast(struct net_device *dev,
-				 struct netdev_hw_addr_list *uc,
-				 struct netdev_hw_addr_list *mc)
+static int netkit_set_multicast(struct net_device *dev,
+				struct netdev_hw_addr_list *uc,
+				struct netdev_hw_addr_list *mc)
 {
 	/* Nothing to do, we receive whatever gets pushed to us! */
+	return 0;
 }
 
 static int netkit_set_macaddr(struct net_device *dev, void *sa)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bf3dd9b2c1a7..07356e784282 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1122,13 +1122,14 @@ struct netdev_net_notifier {
  *	Cannot sleep, called with netif_addr_lock_bh held.
  *	Deprecated in favor of ndo_set_rx_mode_async.
  *
- * void (*ndo_set_rx_mode_async)(struct net_device *dev,
- *				 struct netdev_hw_addr_list *uc,
- *				 struct netdev_hw_addr_list *mc);
+ * int (*ndo_set_rx_mode_async)(struct net_device *dev,
+ *				struct netdev_hw_addr_list *uc,
+ *				struct netdev_hw_addr_list *mc);
  *	Async version of ndo_set_rx_mode which runs in process context
  *	with rtnl_lock and netdev_lock_ops(dev) held. The uc/mc parameters
  *	are snapshots of the address lists - iterate with
- *	netdev_hw_addr_list_for_each(ha, uc).
+ *	netdev_hw_addr_list_for_each(ha, uc). Return 0 on success or a
+ *	negative errno to request a retry via the core backoff.
  *
  * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
  *	This function  is called when the Media Access Control address
@@ -1455,7 +1456,7 @@ struct net_device_ops {
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
 	void			(*ndo_set_rx_mode)(struct net_device *dev);
-	void			(*ndo_set_rx_mode_async)(
+	int			(*ndo_set_rx_mode_async)(
 					struct net_device *dev,
 					struct netdev_hw_addr_list *uc,
 					struct netdev_hw_addr_list *mc);
-- 
2.53.0-Meta


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/3] net: add retry mechanism to ndo_set_rx_mode_async
  2026-05-27  1:41 [PATCH net-next 0/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
  2026-05-27  1:41 ` [PATCH net-next 1/3] net: change ndo_set_rx_mode_async return type to int Stanislav Fomichev
@ 2026-05-27  1:41 ` Stanislav Fomichev
  2026-05-29  0:29   ` Jakub Kicinski
  2026-05-27  1:41 ` [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism Stanislav Fomichev
  2 siblings, 1 reply; 7+ messages in thread
From: Stanislav Fomichev @ 2026-05-27  1:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, kuba, pabeni

When ndo_set_rx_mode_async returns an error, schedule a retry with
exponential backoff (1s, 2s, 4s, 8s — 15s total). Give up after the
4th retry and log an error via netdev_err().

This moves retry logic from individual drivers into the core stack.

Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 include/linux/netdevice.h |  5 ++++
 net/core/dev.c            |  7 +++--
 net/core/dev.h            |  2 ++
 net/core/dev_addr_lists.c | 56 +++++++++++++++++++++++++++++++++++++--
 4 files changed, 66 insertions(+), 4 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 07356e784282..ea4eb7d2cad8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1933,6 +1933,8 @@ enum netdev_reg_state {
  *	@rx_mode_node:		List entry for rx_mode work processing
  *	@rx_mode_tracker:	Refcount tracker for rx_mode work
  *	@rx_mode_addr_cache:	Recycled snapshot entries for rx_mode work
+ *	@rx_mode_retry_work:	Delayed work for failed rx_mode
+ *	@rx_mode_retry_delay:	Backoff delay for the next rx_mode attempt
  *	@uc:			unicast mac addresses
  *	@mc:			multicast mac addresses
  *	@dev_addrs:		list of device hw addresses
@@ -2327,6 +2329,8 @@ struct net_device {
 	struct list_head	rx_mode_node;
 	netdevice_tracker	rx_mode_tracker;
 	struct netdev_hw_addr_list	rx_mode_addr_cache;
+	struct delayed_work	rx_mode_retry_work;
+	unsigned long		rx_mode_retry_delay;
 #ifdef CONFIG_LOCKDEP
 	unsigned char		nested_level;
 #endif
@@ -5150,6 +5154,7 @@ static inline void __dev_mc_unsync(struct net_device *dev,
 
 /* Functions used for secondary unicast and multicast support */
 void dev_set_rx_mode(struct net_device *dev);
+void netif_rx_mode_schedule_retry(struct net_device *dev);
 int netif_set_promiscuity(struct net_device *dev, int inc);
 int dev_set_promiscuity(struct net_device *dev, int inc);
 int netif_set_allmulti(struct net_device *dev, int inc, bool notify);
diff --git a/net/core/dev.c b/net/core/dev.c
index 26ac8eb9b259..751d6b5c4e23 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1775,6 +1775,8 @@ static void __dev_close_many(struct list_head *head)
 		if (ops->ndo_stop)
 			ops->ndo_stop(dev);
 
+		netif_rx_mode_cancel_retry(dev);
+
 		netif_set_up(dev, false);
 		netpoll_poll_enable(dev);
 	}
@@ -11720,6 +11722,8 @@ void netdev_run_todo(void)
 			continue;
 		}
 
+		if (cancel_delayed_work_sync(&dev->rx_mode_retry_work))
+			dev_put(dev);
 		netdev_lock(dev);
 		WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
 		netdev_unlock(dev);
@@ -12100,8 +12104,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 #endif
 
 	mutex_init(&dev->lock);
-	INIT_LIST_HEAD(&dev->rx_mode_node);
-	__hw_addr_init(&dev->rx_mode_addr_cache);
+	netif_rx_mode_init(dev);
 
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
diff --git a/net/core/dev.h b/net/core/dev.h
index 0cf24b8f5008..4352f39c8dfe 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -166,8 +166,10 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier);
 
 void __dev_set_rx_mode(struct net_device *dev);
 int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify);
+void netif_rx_mode_init(struct net_device *dev);
 bool netif_rx_mode_clean(struct net_device *dev);
 void netif_rx_mode_sync(struct net_device *dev);
+void netif_rx_mode_cancel_retry(struct net_device *dev);
 
 void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
 			unsigned int gchanges, u32 portid,
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index d73fcb0c6785..c02aa05c9975 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -23,6 +23,24 @@ static LIST_HEAD(rx_mode_list);
 static DEFINE_SPINLOCK(rx_mode_lock);
 static DECLARE_WORK(rx_mode_work, netdev_rx_mode_work);
 
+static void netif_rx_mode_queue(struct net_device *dev);
+
+static void netif_rx_mode_retry(struct work_struct *work)
+{
+	struct net_device *dev = container_of(work, struct net_device,
+					      rx_mode_retry_work.work);
+
+	netif_rx_mode_queue(dev);
+	dev_put(dev);
+}
+
+void netif_rx_mode_init(struct net_device *dev)
+{
+	INIT_LIST_HEAD(&dev->rx_mode_node);
+	__hw_addr_init(&dev->rx_mode_addr_cache);
+	INIT_DELAYED_WORK(&dev->rx_mode_retry_work, netif_rx_mode_retry);
+}
+
 /*
  * General list handling functions
  */
@@ -1252,6 +1270,35 @@ static int netif_uc_promisc_update(struct net_device *dev)
 	return 0;
 }
 
+void netif_rx_mode_schedule_retry(struct net_device *dev)
+{
+	unsigned long delay;
+
+	/* Total retry budget: 1+2+4+8 = 15 seconds. */
+	if (dev->rx_mode_retry_delay >= 8 * HZ) {
+		netdev_err(dev, "rx_mode retry limit reached, giving up\n");
+		return;
+	}
+
+	delay = dev->rx_mode_retry_delay ? dev->rx_mode_retry_delay * 2 : HZ;
+	dev_hold(dev);
+	if (!schedule_delayed_work(&dev->rx_mode_retry_work, delay)) {
+		dev_put(dev);
+		return;
+	}
+	if (!dev->rx_mode_retry_delay)
+		netdev_info(dev, "rx_mode install failed, retrying with backoff\n");
+	dev->rx_mode_retry_delay = delay;
+}
+EXPORT_SYMBOL_GPL(netif_rx_mode_schedule_retry);
+
+void netif_rx_mode_cancel_retry(struct net_device *dev)
+{
+	if (cancel_delayed_work(&dev->rx_mode_retry_work))
+		dev_put(dev);
+	dev->rx_mode_retry_delay = 0;
+}
+
 static void netif_rx_mode_run(struct net_device *dev)
 {
 	struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref;
@@ -1275,8 +1322,8 @@ static void netif_rx_mode_run(struct net_device *dev)
 		err = netif_addr_lists_snapshot(dev, &uc_snap, &mc_snap,
 						&uc_ref, &mc_ref);
 		if (err) {
-			netdev_WARN(dev, "failed to sync uc/mc addresses\n");
 			netif_addr_unlock_bh(dev);
+			netif_rx_mode_schedule_retry(dev);
 			return;
 		}
 
@@ -1292,12 +1339,17 @@ static void netif_rx_mode_run(struct net_device *dev)
 		__dev_set_promiscuity(dev, promisc_inc, false);
 
 	if (ops->ndo_set_rx_mode_async) {
-		ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap);
+		err = ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap);
 
 		netif_addr_lock_bh(dev);
 		netif_addr_lists_reconcile(dev, &uc_snap, &mc_snap,
 					   &uc_ref, &mc_ref);
 		netif_addr_unlock_bh(dev);
+
+		if (err)
+			netif_rx_mode_schedule_retry(dev);
+		else
+			netif_rx_mode_cancel_retry(dev);
 	} else if (ops->ndo_set_rx_mode) {
 		netif_addr_lock_bh(dev);
 		ops->ndo_set_rx_mode(dev);
-- 
2.53.0-Meta


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism
  2026-05-27  1:41 [PATCH net-next 0/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
  2026-05-27  1:41 ` [PATCH net-next 1/3] net: change ndo_set_rx_mode_async return type to int Stanislav Fomichev
  2026-05-27  1:41 ` [PATCH net-next 2/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
@ 2026-05-27  1:41 ` Stanislav Fomichev
  2026-05-27 17:48   ` Michael Chan
  2 siblings, 1 reply; 7+ messages in thread
From: Stanislav Fomichev @ 2026-05-27  1:41 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, kuba, pabeni, Michael Chan, Pavan Chebbi

Remove the driver-specific BNXT_STATE_L2_FILTER_RETRY + timer + sp_task
retry mechanism and rely on the core stack's ndo_set_rx_mode_async retry
instead.

bnxt_cfg_rx_mode() now returns errors (including -ENODEV when the VF
cannot reach the PF), allowing the core stack to schedule retries with
exponential backoff.

Call core's netif_rx_mode_schedule_retry during reset to handle
VF reopen.

Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Pavan Chebbi <pavan.chebbi@broadcom.com>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 ++++++++---------------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 --
 2 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8459a21cf0a9..3186fdaa1fd0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11232,8 +11232,14 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init)
 	}
 
 	rc = bnxt_cfg_rx_mode(bp, &bp->dev->uc, true);
-	if (rc)
-		goto err_out;
+	if (rc) {
+		if (BNXT_VF(bp) && rc == -ENODEV) {
+			netif_rx_mode_schedule_retry(bp->dev);
+			rc = 0;
+		} else {
+			goto err_out;
+		}
+	}
 
 skip_rx_mask:
 	rc = bnxt_hwrm_set_coal(bp);
@@ -13716,7 +13722,7 @@ static int bnxt_set_rx_mode(struct net_device *dev,
 	if (mask != vnic->rx_mask || uc_update || mc_update) {
 		vnic->rx_mask = mask;
 
-		bnxt_cfg_rx_mode(bp, uc, uc_update);
+		return bnxt_cfg_rx_mode(bp, uc, uc_update);
 	}
 
 	return 0;
@@ -13757,21 +13763,14 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc,
 	for (i = 1, off = 0; i < vnic->uc_filter_count; i++, off += ETH_ALEN) {
 		rc = bnxt_hwrm_set_vnic_filter(bp, 0, i, vnic->uc_list + off);
 		if (rc) {
-			if (BNXT_VF(bp) && rc == -ENODEV) {
-				if (!test_and_set_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
-					netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n");
-				else
-					netdev_dbg(bp->dev, "PF still unavailable while configuring L2 filters.\n");
-				rc = 0;
-			} else {
+			if (BNXT_VF(bp) && rc == -ENODEV)
+				netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n");
+			else
 				netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc);
-			}
 			vnic->uc_filter_count = i;
 			return rc;
 		}
 	}
-	if (test_and_clear_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
-		netdev_notice(bp->dev, "Retry of L2 filter configuration successful.\n");
 
 skip_uc:
 	if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) &&
@@ -14362,9 +14361,6 @@ static void bnxt_timer(struct timer_list *t)
 		}
 	}
 
-	if (test_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
-		bnxt_queue_sp_work(bp, BNXT_RX_MASK_SP_EVENT);
-
 	if ((BNXT_CHIP_P5(bp)) && !bp->chip_rev && netif_carrier_ok(dev))
 		bnxt_queue_sp_work(bp, BNXT_RING_COAL_NOW_SP_EVENT);
 
@@ -14712,7 +14708,6 @@ static void bnxt_ulp_restart(struct bnxt *bp)
 static void bnxt_sp_task(struct work_struct *work)
 {
 	struct bnxt *bp = container_of(work, struct bnxt, sp_task);
-	struct net_device *dev = bp->dev;
 
 	set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
 	smp_mb__after_atomic();
@@ -14790,13 +14785,6 @@ static void bnxt_sp_task(struct work_struct *work)
 	/* These functions below will clear BNXT_STATE_IN_SP_TASK.  They
 	 * must be the last functions to be called before exiting.
 	 */
-	if (test_and_clear_bit(BNXT_RX_MASK_SP_EVENT, &bp->sp_event)) {
-		bnxt_lock_sp(bp);
-		if (test_bit(BNXT_STATE_OPEN, &bp->state))
-			bnxt_cfg_rx_mode(bp, &dev->uc, true);
-		bnxt_unlock_sp(bp);
-	}
-
 	if (test_and_clear_bit(BNXT_RESET_TASK_SP_EVENT, &bp->sp_event))
 		bnxt_reset(bp, false);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 61c847b36b9f..c089afdb186d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2467,7 +2467,6 @@ struct bnxt {
 #define BNXT_STATE_DRV_REGISTERED	7
 #define BNXT_STATE_PCI_CHANNEL_IO_FROZEN	8
 #define BNXT_STATE_NAPI_DISABLED	9
-#define BNXT_STATE_L2_FILTER_RETRY	10
 #define BNXT_STATE_FW_ACTIVATE		11
 #define BNXT_STATE_RECOVER		12
 #define BNXT_STATE_FW_NON_FATAL_COND	13
@@ -2622,7 +2621,6 @@ struct bnxt {
 
 	struct work_struct	sp_task;
 	unsigned long		sp_event;
-#define BNXT_RX_MASK_SP_EVENT		0
 #define BNXT_RX_NTP_FLTR_SP_EVENT	1
 #define BNXT_LINK_CHNG_SP_EVENT		2
 #define BNXT_HWRM_EXEC_FWD_REQ_SP_EVENT	3
-- 
2.53.0-Meta


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism
  2026-05-27  1:41 ` [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism Stanislav Fomichev
@ 2026-05-27 17:48   ` Michael Chan
  2026-05-28 21:30     ` Stanislav Fomichev
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Chan @ 2026-05-27 17:48 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, edumazet, kuba, pabeni, Pavan Chebbi

[-- Attachment #1: Type: text/plain, Size: 2356 bytes --]

On Tue, May 26, 2026 at 6:41 PM Stanislav Fomichev <sdf.kernel@gmail.com> wrote:
>
> Remove the driver-specific BNXT_STATE_L2_FILTER_RETRY + timer + sp_task
> retry mechanism and rely on the core stack's ndo_set_rx_mode_async retry
> instead.
>
> bnxt_cfg_rx_mode() now returns errors (including -ENODEV when the VF
> cannot reach the PF), allowing the core stack to schedule retries with
> exponential backoff.
>
> Call core's netif_rx_mode_schedule_retry during reset to handle
> VF reopen.
>
> Cc: Michael Chan <michael.chan@broadcom.com>
> Cc: Pavan Chebbi <pavan.chebbi@broadcom.com>
> Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> ---
>  drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 ++++++++---------------
>  drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 --
>  2 files changed, 12 insertions(+), 26 deletions(-)
>
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c

> @@ -13757,21 +13763,14 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc,
>         for (i = 1, off = 0; i < vnic->uc_filter_count; i++, off += ETH_ALEN) {
>                 rc = bnxt_hwrm_set_vnic_filter(bp, 0, i, vnic->uc_list + off);
>                 if (rc) {
> -                       if (BNXT_VF(bp) && rc == -ENODEV) {
> -                               if (!test_and_set_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
> -                                       netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n");
> -                               else
> -                                       netdev_dbg(bp->dev, "PF still unavailable while configuring L2 filters.\n");
> -                               rc = 0;
> -                       } else {
> +                       if (BNXT_VF(bp) && rc == -ENODEV)
> +                               netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n");

Maybe we can set rc to -EAGAIN here so it's easier for the caller to
check for retry.

> +                       else
>                                 netdev_err(bp->dev, "HWRM vnic filter failure rc: %x\n", rc);
> -                       }
>                         vnic->uc_filter_count = i;
>                         return rc;
>                 }
>         }

[-- Attachment #2: S/MIME Cryptographic Signature --]
[-- Type: application/pkcs7-signature, Size: 5469 bytes --]

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism
  2026-05-27 17:48   ` Michael Chan
@ 2026-05-28 21:30     ` Stanislav Fomichev
  0 siblings, 0 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2026-05-28 21:30 UTC (permalink / raw)
  To: Michael Chan; +Cc: netdev, davem, edumazet, kuba, pabeni, Pavan Chebbi

On 05/27, Michael Chan wrote:
> On Tue, May 26, 2026 at 6:41 PM Stanislav Fomichev <sdf.kernel@gmail.com> wrote:
> >
> > Remove the driver-specific BNXT_STATE_L2_FILTER_RETRY + timer + sp_task
> > retry mechanism and rely on the core stack's ndo_set_rx_mode_async retry
> > instead.
> >
> > bnxt_cfg_rx_mode() now returns errors (including -ENODEV when the VF
> > cannot reach the PF), allowing the core stack to schedule retries with
> > exponential backoff.
> >
> > Call core's netif_rx_mode_schedule_retry during reset to handle
> > VF reopen.
> >
> > Cc: Michael Chan <michael.chan@broadcom.com>
> > Cc: Pavan Chebbi <pavan.chebbi@broadcom.com>
> > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > ---
> >  drivers/net/ethernet/broadcom/bnxt/bnxt.c | 36 ++++++++---------------
> >  drivers/net/ethernet/broadcom/bnxt/bnxt.h |  2 --
> >  2 files changed, 12 insertions(+), 26 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
> 
> > @@ -13757,21 +13763,14 @@ static int bnxt_cfg_rx_mode(struct bnxt *bp, struct netdev_hw_addr_list *uc,
> >         for (i = 1, off = 0; i < vnic->uc_filter_count; i++, off += ETH_ALEN) {
> >                 rc = bnxt_hwrm_set_vnic_filter(bp, 0, i, vnic->uc_list + off);
> >                 if (rc) {
> > -                       if (BNXT_VF(bp) && rc == -ENODEV) {
> > -                               if (!test_and_set_bit(BNXT_STATE_L2_FILTER_RETRY, &bp->state))
> > -                                       netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n");
> > -                               else
> > -                                       netdev_dbg(bp->dev, "PF still unavailable while configuring L2 filters.\n");
> > -                               rc = 0;
> > -                       } else {
> > +                       if (BNXT_VF(bp) && rc == -ENODEV)
> > +                               netdev_warn(bp->dev, "Cannot configure L2 filters while PF is unavailable, will retry\n");
> 
> Maybe we can set rc to -EAGAIN here so it's easier for the caller to
> check for retry.

Yeah, that makes sense, will do, thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/3] net: add retry mechanism to ndo_set_rx_mode_async
  2026-05-27  1:41 ` [PATCH net-next 2/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
@ 2026-05-29  0:29   ` Jakub Kicinski
  0 siblings, 0 replies; 7+ messages in thread
From: Jakub Kicinski @ 2026-05-29  0:29 UTC (permalink / raw)
  To: Stanislav Fomichev; +Cc: netdev, davem, edumazet, pabeni

On Tue, 26 May 2026 18:41:16 -0700 Stanislav Fomichev wrote:
> When ndo_set_rx_mode_async returns an error, schedule a retry with
> exponential backoff (1s, 2s, 4s, 8s — 15s total). Give up after the

Em-dash detected. Issuing a brown bag and initiating shaming procedures.

> 4th retry and log an error via netdev_err().
> 
> This moves retry logic from individual drivers into the core stack.


> @@ -2327,6 +2329,8 @@ struct net_device {
>  	struct list_head	rx_mode_node;
>  	netdevice_tracker	rx_mode_tracker;
>  	struct netdev_hw_addr_list	rx_mode_addr_cache;
> +	struct delayed_work	rx_mode_retry_work;

Either replace the existing work or only add a timer?
Having two work structs feels odd.

> +	unsigned long		rx_mode_retry_delay;

int should be plenty bits for this.
Actually for clarity maybe it's better to keep a counter here?
easy enough to (base << dev->bla_retry_counter) at scheduling
time and that way we don't have to remember what the init value
should be. Retry counter resetting to 0 is quite obvious, and
so is the cap value.

>  #ifdef CONFIG_LOCKDEP
>  	unsigned char		nested_level;
>  #endif
> @@ -5150,6 +5154,7 @@ static inline void __dev_mc_unsync(struct net_device *dev,
>  
>  /* Functions used for secondary unicast and multicast support */
>  void dev_set_rx_mode(struct net_device *dev);
> +void netif_rx_mode_schedule_retry(struct net_device *dev);
>  int netif_set_promiscuity(struct net_device *dev, int inc);
>  int dev_set_promiscuity(struct net_device *dev, int inc);
>  int netif_set_allmulti(struct net_device *dev, int inc, bool notify);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 26ac8eb9b259..751d6b5c4e23 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1775,6 +1775,8 @@ static void __dev_close_many(struct list_head *head)
>  		if (ops->ndo_stop)
>  			ops->ndo_stop(dev);
>  
> +		netif_rx_mode_cancel_retry(dev);
> +
>  		netif_set_up(dev, false);
>  		netpoll_poll_enable(dev);
>  	}
> @@ -11720,6 +11722,8 @@ void netdev_run_todo(void)
>  			continue;
>  		}
>  
> +		if (cancel_delayed_work_sync(&dev->rx_mode_retry_work))
> +			dev_put(dev);

trackers required these days
Perhaps you can avoid the reference if you use disable_.._sync() ?

>  		netdev_lock(dev);
>  		WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
>  		netdev_unlock(dev);
> @@ -12100,8 +12104,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
>  #endif
>  
>  	mutex_init(&dev->lock);
> -	INIT_LIST_HEAD(&dev->rx_mode_node);
> -	__hw_addr_init(&dev->rx_mode_addr_cache);
> +	netif_rx_mode_init(dev);
>  
>  	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
>  	setup(dev);
> diff --git a/net/core/dev.h b/net/core/dev.h
> index 0cf24b8f5008..4352f39c8dfe 100644
> --- a/net/core/dev.h
> +++ b/net/core/dev.h
> @@ -166,8 +166,10 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier);
>  
>  void __dev_set_rx_mode(struct net_device *dev);
>  int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify);
> +void netif_rx_mode_init(struct net_device *dev);
>  bool netif_rx_mode_clean(struct net_device *dev);
>  void netif_rx_mode_sync(struct net_device *dev);
> +void netif_rx_mode_cancel_retry(struct net_device *dev);
>  
>  void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
>  			unsigned int gchanges, u32 portid,
> diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
> index d73fcb0c6785..c02aa05c9975 100644
> --- a/net/core/dev_addr_lists.c
> +++ b/net/core/dev_addr_lists.c
> @@ -23,6 +23,24 @@ static LIST_HEAD(rx_mode_list);
>  static DEFINE_SPINLOCK(rx_mode_lock);
>  static DECLARE_WORK(rx_mode_work, netdev_rx_mode_work);
>  
> +static void netif_rx_mode_queue(struct net_device *dev);

No fwd declarations please :/

> +static void netif_rx_mode_retry(struct work_struct *work)
> +{
> +	struct net_device *dev = container_of(work, struct net_device,
> +					      rx_mode_retry_work.work);
> +
> +	netif_rx_mode_queue(dev);
> +	dev_put(dev);
> +}
> +
> +void netif_rx_mode_init(struct net_device *dev)
> +{
> +	INIT_LIST_HEAD(&dev->rx_mode_node);
> +	__hw_addr_init(&dev->rx_mode_addr_cache);
> +	INIT_DELAYED_WORK(&dev->rx_mode_retry_work, netif_rx_mode_retry);
> +}
> +
>  /*
>   * General list handling functions
>   */
> @@ -1252,6 +1270,35 @@ static int netif_uc_promisc_update(struct net_device *dev)
>  	return 0;
>  }
>  
> +void netif_rx_mode_schedule_retry(struct net_device *dev)
> +{
> +	unsigned long delay;
> +
> +	/* Total retry budget: 1+2+4+8 = 15 seconds. */

What is the value of this comment..?

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2026-05-29  0:29 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-27  1:41 [PATCH net-next 0/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
2026-05-27  1:41 ` [PATCH net-next 1/3] net: change ndo_set_rx_mode_async return type to int Stanislav Fomichev
2026-05-27  1:41 ` [PATCH net-next 2/3] net: add retry mechanism to ndo_set_rx_mode_async Stanislav Fomichev
2026-05-29  0:29   ` Jakub Kicinski
2026-05-27  1:41 ` [PATCH net-next 3/3] bnxt: convert to core rx_mode retry mechanism Stanislav Fomichev
2026-05-27 17:48   ` Michael Chan
2026-05-28 21:30     ` Stanislav Fomichev

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox