Netdev List
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: davem@davemloft.net
Cc: netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com,
	andrew+netdev@lunn.ch, horms@kernel.org, jv@jvosburgh.net,
	sdf@fomichev.me, dongchenchen2@huawei.com, idosch@nvidia.com,
	n05ec@lzu.edu.cn, yuantan098@gmail.com, kuniyu@google.com,
	nb@tipi-net.de, aleksandr.loktionov@intel.com,
	dtatulea@nvidia.com, Jakub Kicinski <kuba@kernel.org>
Subject: [PATCH net 2/4] net: add the driver-facing netdev_work scheduling API
Date: Wed, 24 Jun 2026 11:20:16 -0700	[thread overview]
Message-ID: <20260624182018.2445732-3-kuba@kernel.org> (raw)
In-Reply-To: <20260624182018.2445732-1-kuba@kernel.org>

With an extra event mask we can easily extend the netdev work
to also service driver-defined events. For advanced drivers
this is probably not a perfect match, but it makes running
deferred work easier in simple cases.

Expose the netdev_work facility to drivers. Add helpers
to schedule work and a dedicated ndo to perform the driver-
-scheduled actions.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h | 11 ++++++
 net/core/netdev_work.c    | 81 ++++++++++++++++++++++++++++++---------
 2 files changed, 74 insertions(+), 18 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 732506787db3..9981d637f8b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1131,6 +1131,9 @@ struct netdev_net_notifier {
  *	netdev_hw_addr_list_for_each(ha, uc). Return 0 on success or a
  *	negative errno to request a retry via the core backoff.
  *
+ * void (*ndo_work)(struct net_device *dev, unsigned long events);
+ *	Run deferred work scheduled with netdev_work_sched(@events).
+ *
  * int (*ndo_set_mac_address)(struct net_device *dev, void *addr);
  *	This function  is called when the Media Access Control address
  *	needs to be changed. If this interface is not defined, the
@@ -1460,6 +1463,8 @@ struct net_device_ops {
 					struct net_device *dev,
 					struct netdev_hw_addr_list *uc,
 					struct netdev_hw_addr_list *mc);
+	void			(*ndo_work)(struct net_device *dev,
+					    unsigned long events);
 	int			(*ndo_set_mac_address)(struct net_device *dev,
 						       void *addr);
 	int			(*ndo_validate_addr)(struct net_device *dev);
@@ -1932,6 +1937,8 @@ enum netdev_reg_state {
  *				does not implement ndo_set_rx_mode()
  *	@work_node:		List entry for async netdev_work processing
  *	@work_tracker:		Refcount tracker for async netdev_work
+ *	@work_pending:		Driver-defined pending netdev_work, passed to
+ *				ndo_work() (see netdev_work_sched())
  *	@work_core_pending:	Core-defined pending netdev_work (NETDEV_WORK_*)
  *	@rx_mode_addr_cache:	Recycled snapshot entries for rx_mode work
  *	@rx_mode_retry_timer:	Timer that re-queues rx_mode work after failure
@@ -2329,6 +2336,7 @@ struct net_device {
 	bool			uc_promisc;
 	struct list_head	work_node;
 	netdevice_tracker	work_tracker;
+	unsigned long		work_pending;
 	unsigned long		work_core_pending;
 	struct netdev_hw_addr_list	rx_mode_addr_cache;
 	struct timer_list	rx_mode_retry_timer;
@@ -5178,6 +5186,9 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
 			   const struct pcpu_sw_netstats __percpu *netstats);
 void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);
 
+void netdev_work_sched(struct net_device *dev, unsigned long events);
+unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask);
+
 enum {
 	NESTED_SYNC_IMM_BIT,
 	NESTED_SYNC_TODO_BIT,
diff --git a/net/core/netdev_work.c b/net/core/netdev_work.c
index c121c24dc493..3109fae132ad 100644
--- a/net/core/netdev_work.c
+++ b/net/core/netdev_work.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include <linux/export.h>
 #include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
@@ -16,32 +17,63 @@ static void netdev_work_proc(struct work_struct *work);
  *  - within the list entries (struct net_device fields):
  *	- work_node
  *	- work_tracker
+ *	- work_pending
  *	- work_core_pending
  */
 static LIST_HEAD(netdev_work_list);
 static DEFINE_SPINLOCK(netdev_work_lock);
 static DECLARE_WORK(netdev_work, netdev_work_proc);
 
-void __netdev_work_core_sched(struct net_device *dev, unsigned long event)
+static void netdev_work_enqueue(struct net_device *dev, unsigned long events,
+				unsigned long core)
 {
+	if (!events && !core)
+		return;
+
 	spin_lock_bh(&netdev_work_lock);
 	if (list_empty(&dev->work_node)) {
 		list_add_tail(&dev->work_node, &netdev_work_list);
 		netdev_hold(dev, &dev->work_tracker, GFP_ATOMIC);
 	}
-	dev->work_core_pending |= event;
+	dev->work_pending |= events;
+	dev->work_core_pending |= core;
 	spin_unlock_bh(&netdev_work_lock);
 
 	schedule_work(&netdev_work);
 }
 
+static unsigned long
+netdev_work_dequeue(struct net_device *dev, unsigned long *pending,
+		    unsigned long mask)
+{
+	unsigned long events;
+
+	spin_lock_bh(&netdev_work_lock);
+	events = *pending & mask;
+	*pending &= ~events;
+	if (!list_empty(&dev->work_node) &&
+	    !dev->work_pending && !dev->work_core_pending) {
+		list_del_init(&dev->work_node);
+		netdev_put(dev, &dev->work_tracker);
+	}
+	spin_unlock_bh(&netdev_work_lock);
+
+	return events;
+}
+
+void netdev_work_sched(struct net_device *dev, unsigned long events)
+{
+	netdev_work_enqueue(dev, events, 0);
+}
+EXPORT_SYMBOL(netdev_work_sched);
+
 /**
- * __netdev_work_core_cancel() - cancel selected core work for a netdev
+ * netdev_work_cancel() - cancel selected work for a netdev
  * @dev: net_device
  * @mask: events to cancel
  *
  * Clear @mask from the device's work pending mask. If no work is left pending
- * the device is dequeued.
+ * the device is dequeued and its ndo_work won't be called.
  *
  * No expectations on locking, but also no guarantees provided. If the caller
  * wants to touch @dev afterwards (e.g. call the work that got canceled)
@@ -50,21 +82,33 @@ void __netdev_work_core_sched(struct net_device *dev, unsigned long event)
  * Returns: the subset of @mask that was actually pending, so the caller can run
  * those events inline.
  */
+unsigned long netdev_work_cancel(struct net_device *dev, unsigned long mask)
+{
+	return netdev_work_dequeue(dev, &dev->work_pending, mask);
+}
+EXPORT_SYMBOL(netdev_work_cancel);
+
+void __netdev_work_core_sched(struct net_device *dev, unsigned long events)
+{
+	netdev_work_enqueue(dev, 0, events);
+}
+
 unsigned long
 __netdev_work_core_cancel(struct net_device *dev, unsigned long mask)
 {
-	unsigned long event;
+	return netdev_work_dequeue(dev, &dev->work_core_pending, mask);
+}
 
-	spin_lock_bh(&netdev_work_lock);
-	event = dev->work_core_pending & mask;
-	dev->work_core_pending &= ~mask;
-	if (!list_empty(&dev->work_node) && !dev->work_core_pending) {
-		list_del_init(&dev->work_node);
-		netdev_put(dev, &dev->work_tracker);
-	}
-	spin_unlock_bh(&netdev_work_lock);
+static void netdev_work_run(struct net_device *dev, unsigned long events,
+			    unsigned long core)
+{
+	if (!netif_device_present(dev))
+		return;
 
-	return event;
+	if (core & NETDEV_WORK_RX_MODE)
+		netif_rx_mode_run(dev);
+	if (events && dev->netdev_ops->ndo_work)
+		dev->netdev_ops->ndo_work(dev, events);
 }
 
 static void netdev_work_proc(struct work_struct *work)
@@ -72,9 +116,9 @@ static void netdev_work_proc(struct work_struct *work)
 	rtnl_lock();
 
 	while (true) {
+		unsigned long events = 0, core = 0;
 		netdevice_tracker tracker;
 		struct net_device *dev;
-		unsigned long core = 0;
 
 		spin_lock_bh(&netdev_work_lock);
 		if (list_empty(&netdev_work_list)) {
@@ -98,16 +142,17 @@ static void netdev_work_proc(struct work_struct *work)
 			list_del_init(&dev->work_node);
 			core = dev->work_core_pending;
 			dev->work_core_pending = 0;
+			events = dev->work_pending;
+			dev->work_pending = 0;
 			/* We took another ref above */
 			netdev_put(dev, &dev->work_tracker);
 
 			if (!dev_isalive(dev))
-				core = 0;
+				core = events = 0;
 		}
 		spin_unlock_bh(&netdev_work_lock);
 
-		if (core & NETDEV_WORK_RX_MODE)
-			netif_rx_mode_run(dev);
+		netdev_work_run(dev, events, core);
 		netdev_unlock_ops(dev);
 
 		netdev_put(dev, &tracker);
-- 
2.54.0


  parent reply	other threads:[~2026-06-24 18:20 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-24 18:20 [PATCH net 0/4] net: avoid nested UP notifier events Jakub Kicinski
2026-06-24 18:20 ` [PATCH net 1/4] net: turn the rx_mode work into a generic netdev_work facility Jakub Kicinski
2026-06-24 18:20 ` Jakub Kicinski [this message]
2026-06-24 18:20 ` [PATCH net 3/4] vlan: defer real device state propagation to netdev_work Jakub Kicinski
2026-06-24 18:20 ` [PATCH net 4/4] selftests: bonding: add a test for VLAN propagation over a bonded real device Jakub Kicinski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260624182018.2445732-3-kuba@kernel.org \
    --to=kuba@kernel.org \
    --cc=aleksandr.loktionov@intel.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=dongchenchen2@huawei.com \
    --cc=dtatulea@nvidia.com \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=idosch@nvidia.com \
    --cc=jv@jvosburgh.net \
    --cc=kuniyu@google.com \
    --cc=n05ec@lzu.edu.cn \
    --cc=nb@tipi-net.de \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=yuantan098@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox