From: Jakub Kicinski <kuba@kernel.org>
To: Stanislav Fomichev <sdf@fomichev.me>
Cc: netdev@vger.kernel.org, davem@davemloft.net, edumazet@google.com,
pabeni@redhat.com, horms@kernel.org, corbet@lwn.net,
skhan@linuxfoundation.org, andrew+netdev@lunn.ch,
michael.chan@broadcom.com, pavan.chebbi@broadcom.com,
anthony.l.nguyen@intel.com, przemyslaw.kitszel@intel.com,
saeedm@nvidia.com, tariqt@nvidia.com, mbloch@nvidia.com,
alexanderduyck@fb.com, kernel-team@meta.com,
johannes@sipsolutions.net, sd@queasysnail.net,
jianbol@nvidia.com, dtatulea@nvidia.com, mohsin.bashr@gmail.com,
jacob.e.keller@intel.com, willemb@google.com,
skhawaja@google.com, bestswngs@gmail.com,
aleksandr.loktionov@intel.com, kees@kernel.org,
linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
intel-wired-lan@lists.osuosl.org, linux-rdma@vger.kernel.org,
linux-wireless@vger.kernel.org, linux-kselftest@vger.kernel.org,
leon@kernel.org
Subject: Re: [PATCH net-next v3 03/13] net: introduce ndo_set_rx_mode_async and dev_rx_mode_work
Date: Mon, 23 Mar 2026 16:20:03 -0700 [thread overview]
Message-ID: <20260323162003.0d155055@kernel.org> (raw)
In-Reply-To: <20260320012501.2033548-4-sdf@fomichev.me>
On Thu, 19 Mar 2026 18:24:51 -0700 Stanislav Fomichev wrote:
> diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
> index 35704d115312..dc83d78d3b27 100644
> --- a/Documentation/networking/netdevices.rst
> +++ b/Documentation/networking/netdevices.rst
> @@ -289,6 +289,14 @@ struct net_device synchronization rules
> ndo_set_rx_mode:
> Synchronization: netif_addr_lock spinlock.
> Context: BHs disabled
> + Notes: Deprecated in favor of sleepable ndo_set_rx_mode_async.
>
> +ndo_set_rx_mode_async:
> + Synchronization: rtnl_lock() semaphore. In addition, netdev instance
> + lock if the driver implements queue management or shaper API.
> + Context: process (from a work queue)
> + Notes: Sleepable version of ndo_set_rx_mode. Receives snapshots
It's probably just my weirdness but I find creating adjectives out
of random nouns by adding "-able" to be in poor taste. "sleepable"
took root in certain three letter subsystems but I hope it won't
in netdev.
Please use your words:
Notes: Async version of ndo_set_rx_mode which runs in process
context. Receives snapshots f the unicast and multicast address lists.
> + of the unicast and multicast address lists.
>
> ndo_setup_tc:
> ``TC_SETUP_BLOCK`` and ``TC_SETUP_FT`` are running under NFT locks
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 469b7cdb3237..b05bdd67b807 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1117,6 +1117,16 @@ struct netdev_net_notifier {
> * This function is called device changes address list filtering.
> * If driver handles unicast address filtering, it should set
> * IFF_UNICAST_FLT in its priv_flags.
> + * Cannot sleep, called with netif_addr_lock_bh held.
> + * Deprecated in favor of sleepable ndo_set_rx_mode_async.
> + *
> + * void (*ndo_set_rx_mode_async)(struct net_device *dev,
> + * struct netdev_hw_addr_list *uc,
> + * struct netdev_hw_addr_list *mc);
> + * Sleepable version of ndo_set_rx_mode. Called from a work queue
> + * with rtnl_lock and netdev_lock_ops(dev) held. The uc/mc parameters
> + * are snapshots of the address lists - iterate with
> + * netdev_hw_addr_list_for_each(ha, uc).
> *
> * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
> * This function is called when the Media Access Control address
> @@ -1437,6 +1447,9 @@ struct net_device_ops {
> void (*ndo_change_rx_flags)(struct net_device *dev,
> int flags);
> void (*ndo_set_rx_mode)(struct net_device *dev);
> + void (*ndo_set_rx_mode_async)(struct net_device *dev,
> + struct netdev_hw_addr_list *uc,
> + struct netdev_hw_addr_list *mc);
> int (*ndo_set_mac_address)(struct net_device *dev,
> void *addr);
> int (*ndo_validate_addr)(struct net_device *dev);
> @@ -1903,6 +1916,7 @@ enum netdev_reg_state {
> * has been enabled due to the need to listen to
> * additional unicast addresses in a device that
> * does not implement ndo_set_rx_mode()
> + * @rx_mode_work: Work queue entry for ndo_set_rx_mode_async()
> * @uc: unicast mac addresses
> * @mc: multicast mac addresses
> * @dev_addrs: list of device hw addresses
> @@ -2293,6 +2307,7 @@ struct net_device {
> unsigned int promiscuity;
> unsigned int allmulti;
> bool uc_promisc;
> + struct work_struct rx_mode_work;
> #ifdef CONFIG_LOCKDEP
> unsigned char nested_level;
> #endif
> @@ -4661,6 +4676,11 @@ static inline bool netif_device_present(const struct net_device *dev)
> return test_bit(__LINK_STATE_PRESENT, &dev->state);
> }
>
> +static inline bool netif_up_and_present(const struct net_device *dev)
> +{
> + return (dev->flags & IFF_UP) && netif_device_present(dev);
Is this really worth a dedicated helper? What are you trying to express
here semantically?
> +
> void netif_device_detach(struct net_device *dev);
>
> void netif_device_attach(struct net_device *dev);
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 200d44883fc1..fedc423306fc 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2381,6 +2381,8 @@ static void netstamp_clear(struct work_struct *work)
> static DECLARE_WORK(netstamp_work, netstamp_clear);
> #endif
>
> +static struct workqueue_struct *rx_mode_wq;
> +
> void net_enable_timestamp(void)
> {
> #ifdef CONFIG_JUMP_LABEL
> @@ -9669,22 +9671,84 @@ int netif_set_allmulti(struct net_device *dev, int inc, bool notify)
> return 0;
> }
>
> -/*
> - * Upload unicast and multicast address lists to device and
> - * configure RX filtering. When the device doesn't support unicast
> - * filtering it is put in promiscuous mode while unicast addresses
> - * are present.
> +static void dev_rx_mode_work(struct work_struct *work)
> +{
> + struct net_device *dev = container_of(work, struct net_device,
> + rx_mode_work);
> + struct netdev_hw_addr_list uc_snap, mc_snap, uc_ref, mc_ref;
> + const struct net_device_ops *ops = dev->netdev_ops;
> + int err;
> +
> + __hw_addr_init(&uc_snap);
> + __hw_addr_init(&mc_snap);
> + __hw_addr_init(&uc_ref);
> + __hw_addr_init(&mc_ref);
> +
> + rtnl_lock();
> + netdev_lock_ops(dev);
> +
> + if (!netif_up_and_present(dev))
> + goto out;
> +
> + if (ops->ndo_set_rx_mode_async) {
How did we get here if we don't have this op?
Are you planning to plumb more code thru this work in the future?
If yes the whole rx_mode handling should be in a dedicated helper
rather than indenting most of the function.
> + netif_addr_lock_bh(dev);
> +
> + err = __hw_addr_list_snapshot(&uc_snap, &dev->uc,
> + dev->addr_len);
> + if (!err)
> + err = __hw_addr_list_snapshot(&uc_ref, &dev->uc,
> + dev->addr_len);
> + if (!err)
> + err = __hw_addr_list_snapshot(&mc_snap, &dev->mc,
> + dev->addr_len);
> + if (!err)
> + err = __hw_addr_list_snapshot(&mc_ref, &dev->mc,
> + dev->addr_len);
This doesn't get slow with a few thousands of addresses?
> + netif_addr_unlock_bh(dev);
> +
> + if (err) {
> + netdev_WARN(dev, "failed to sync uc/mc addresses\n");
> + __hw_addr_flush(&uc_snap);
> + __hw_addr_flush(&uc_ref);
> + __hw_addr_flush(&mc_snap);
> + goto out;
> + }
> +
> + ops->ndo_set_rx_mode_async(dev, &uc_snap, &mc_snap);
> +
> + netif_addr_lock_bh(dev);
> + __hw_addr_list_reconcile(&dev->uc, &uc_snap,
> + &uc_ref, dev->addr_len);
> + __hw_addr_list_reconcile(&dev->mc, &mc_snap,
> + &mc_ref, dev->addr_len);
> + netif_addr_unlock_bh(dev);
> + }
> +
> +out:
> + netdev_unlock_ops(dev);
> + rtnl_unlock();
> +}
> +
> +/**
> + * __dev_set_rx_mode() - upload unicast and multicast address lists to device
> + * and configure RX filtering.
> + * @dev: device
> + *
> + * When the device doesn't support unicast filtering it is put in promiscuous
> + * mode while unicast addresses are present.
> */
> void __dev_set_rx_mode(struct net_device *dev)
> {
> const struct net_device_ops *ops = dev->netdev_ops;
>
> /* dev_open will call this function so the list will stay sane. */
> - if (!(dev->flags&IFF_UP))
> + if (!netif_up_and_present(dev))
> return;
>
> - if (!netif_device_present(dev))
> + if (ops->ndo_set_rx_mode_async) {
> + queue_work(rx_mode_wq, &dev->rx_mode_work);
> return;
> + }
>
> if (!(dev->priv_flags & IFF_UNICAST_FLT)) {
> /* Unicast addresses changes may only happen under the rtnl,
> @@ -11708,6 +11772,16 @@ void netdev_run_todo(void)
>
> __rtnl_unlock();
>
> + /* Make sure all pending rx_mode work completes before returning.
> + *
> + * rx_mode_wq may be NULL during early boot:
> + * core_initcall(netlink_proto_init) vs subsys_initcall(net_dev_init).
> + *
> + * Check current_work() to avoid flushing from the wq.
> + */
> + if (rx_mode_wq && !current_work())
> + flush_workqueue(rx_mode_wq);
Can we give the work a reference on the netdev (at init time) and
cancel + release it here instead of flushing / waiting?
> /* Wait for rcu callbacks to finish before next phase */
> if (!list_empty(&list))
> rcu_barrier();
> @@ -12099,6 +12173,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
> #endif
>
> mutex_init(&dev->lock);
> + INIT_WORK(&dev->rx_mode_work, dev_rx_mode_work);
>
> dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
> setup(dev);
> @@ -12203,6 +12278,8 @@ void free_netdev(struct net_device *dev)
>
> kfree(rcu_dereference_protected(dev->ingress_queue, 1));
>
> + cancel_work_sync(&dev->rx_mode_work);
Should never happen so maybe wrap it in a WARN ?
> /* Flush device addresses */
> dev_addr_flush(dev);
>
> @@ -13296,6 +13373,10 @@ static int __init net_dev_init(void)
> if (register_pernet_device(&default_device_ops))
> goto out;
>
> + rx_mode_wq = alloc_ordered_workqueue("rx_mode_wq", 0);
> + if (!rx_mode_wq)
> + goto out;
> +
> open_softirq(NET_TX_SOFTIRQ, net_tx_action);
> open_softirq(NET_RX_SOFTIRQ, net_rx_action);
>
next prev parent reply other threads:[~2026-03-23 23:20 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-03-20 1:24 [PATCH net-next v3 00/13] net: sleepable ndo_set_rx_mode Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 01/13] net: add address list snapshot and reconciliation infrastructure Stanislav Fomichev
2026-03-23 23:20 ` Jakub Kicinski
2026-03-24 18:13 ` Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 02/13] wifi: cfg80211: use __rtnl_unlock in nl80211_pre_doit Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 03/13] net: introduce ndo_set_rx_mode_async and dev_rx_mode_work Stanislav Fomichev
2026-03-20 7:13 ` [Intel-wired-lan] " Loktionov, Aleksandr
2026-03-20 15:49 ` Stanislav Fomichev
2026-03-23 23:20 ` Jakub Kicinski [this message]
2026-03-24 18:13 ` Stanislav Fomichev
2026-03-24 21:21 ` Jakub Kicinski
2026-03-24 22:49 ` Stanislav Fomichev
2026-03-25 3:44 ` Jakub Kicinski
2026-03-25 15:06 ` Stanislav Fomichev
2026-03-25 17:34 ` Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 04/13] net: move promiscuity handling into dev_rx_mode_work Stanislav Fomichev
2026-03-20 8:01 ` Loktionov, Aleksandr
2026-03-20 15:41 ` Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 05/13] fbnic: convert to ndo_set_rx_mode_async Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 06/13] mlx5: " Stanislav Fomichev
2026-03-20 14:47 ` Cosmin Ratiu
2026-03-20 15:42 ` Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 07/13] bnxt: " Stanislav Fomichev
2026-03-24 0:47 ` Michael Chan
2026-03-20 1:24 ` [PATCH net-next v3 08/13] bnxt: use snapshot in bnxt_cfg_rx_mode Stanislav Fomichev
2026-03-20 7:51 ` Loktionov, Aleksandr
2026-03-24 1:08 ` Michael Chan
2026-03-24 18:29 ` Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 09/13] iavf: convert to ndo_set_rx_mode_async Stanislav Fomichev
2026-03-20 7:53 ` Loktionov, Aleksandr
2026-03-20 1:24 ` [PATCH net-next v3 10/13] netdevsim: " Stanislav Fomichev
2026-03-20 1:24 ` [PATCH net-next v3 11/13] dummy: " Stanislav Fomichev
2026-03-20 7:54 ` Loktionov, Aleksandr
2026-03-20 1:25 ` [PATCH net-next v3 12/13] net: warn ops-locked drivers still using ndo_set_rx_mode Stanislav Fomichev
2026-03-20 7:55 ` Loktionov, Aleksandr
2026-03-20 1:25 ` [PATCH net-next v3 13/13] selftests: net: add team_bridge_macvlan rx_mode test Stanislav Fomichev
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260323162003.0d155055@kernel.org \
--to=kuba@kernel.org \
--cc=aleksandr.loktionov@intel.com \
--cc=alexanderduyck@fb.com \
--cc=andrew+netdev@lunn.ch \
--cc=anthony.l.nguyen@intel.com \
--cc=bestswngs@gmail.com \
--cc=corbet@lwn.net \
--cc=davem@davemloft.net \
--cc=dtatulea@nvidia.com \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=intel-wired-lan@lists.osuosl.org \
--cc=jacob.e.keller@intel.com \
--cc=jianbol@nvidia.com \
--cc=johannes@sipsolutions.net \
--cc=kees@kernel.org \
--cc=kernel-team@meta.com \
--cc=leon@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=linux-wireless@vger.kernel.org \
--cc=mbloch@nvidia.com \
--cc=michael.chan@broadcom.com \
--cc=mohsin.bashr@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pavan.chebbi@broadcom.com \
--cc=przemyslaw.kitszel@intel.com \
--cc=saeedm@nvidia.com \
--cc=sd@queasysnail.net \
--cc=sdf@fomichev.me \
--cc=skhan@linuxfoundation.org \
--cc=skhawaja@google.com \
--cc=tariqt@nvidia.com \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox