From: Mark Bloch <mbloch@nvidia.com>
To: Jiri Pirko <jiri@resnulli.us>, Eric Dumazet <edumazet@google.com>,
"Jakub Kicinski" <kuba@kernel.org>,
Paolo Abeni <pabeni@redhat.com>, Simon Horman <horms@kernel.org>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
Leon Romanovsky <leon@kernel.org>,
Tariq Toukan <tariqt@nvidia.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
Jonathan Corbet <corbet@lwn.net>,
Shuah Khan <skhan@linuxfoundation.org>, <netdev@vger.kernel.org>,
<linux-rdma@vger.kernel.org>, <linux-doc@vger.kernel.org>,
Mark Bloch <mbloch@nvidia.com>
Subject: [PATCH net-next V4 4/6] devlink: Apply eswitch mode boot defaults
Date: Mon, 29 Jun 2026 21:20:59 +0300 [thread overview]
Message-ID: <20260629182102.245150-5-mbloch@nvidia.com> (raw)
In-Reply-To: <20260629182102.245150-1-mbloch@nvidia.com>
Apply parsed devlink_eswitch_mode= defaults after devlink registration
and after successful reload.
devl_register() may still be called before the device is ready for an
eswitch mode change, so keep a per-devlink delayed work item and pending
flag for the registration path. Registration queues the work, and the
worker tries to take the devlink instance lock.
If the lock is busy, the worker requeues itself with a delay.
For successful reloads that performed DRIVER_REINIT, devlink_reload()
already holds the devlink instance lock and the driver has completed
reload_up(). Clear pending work and apply the default directly from the
reload path instead of queueing work.
If a user sets eswitch mode through netlink before the pending
registration work runs, clear the pending flag so the queued default does
not override that user request. Cancel pending default apply work when
freeing the devlink instance.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
---
net/devlink/core.c | 198 +++++++++++++++++++++++++++++++-----
net/devlink/dev.c | 6 ++
net/devlink/devl_internal.h | 5 +
3 files changed, 182 insertions(+), 27 deletions(-)
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 5126509a9c4e..998e4ffd5dce 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -5,6 +5,7 @@
*/
#include <linux/init.h>
+#include <linux/jiffies.h>
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/string.h>
@@ -22,8 +23,12 @@ DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
static char *devlink_default_esw_mode_param;
static bool devlink_default_esw_mode_match_all;
+static bool devlink_default_esw_mode_enabled;
static enum devlink_eswitch_mode devlink_default_esw_mode;
static LIST_HEAD(devlink_default_esw_mode_nodes);
+static struct workqueue_struct *devlink_default_esw_mode_wq;
+
+#define DEVLINK_DEFAULT_ESW_MODE_APPLY_DELAY msecs_to_jiffies(100)
struct devlink_default_esw_mode_node {
struct list_head list;
@@ -166,6 +171,7 @@ static void __init devlink_default_esw_mode_nodes_clear(void)
}
devlink_default_esw_mode_match_all = false;
+ devlink_default_esw_mode_enabled = false;
}
static int __init devlink_default_esw_mode_parse(char *str)
@@ -192,14 +198,113 @@ static int __init devlink_default_esw_mode_parse(char *str)
return err;
err = devlink_default_esw_mode_handles_parse(handles);
- if (err)
+ if (err) {
devlink_default_esw_mode_nodes_clear();
- else
+ } else {
devlink_default_esw_mode = esw_mode;
+ devlink_default_esw_mode_enabled = true;
+ }
return err;
}
+static bool devlink_default_esw_mode_match(struct devlink *devlink)
+{
+ const char *bus_name = devlink_bus_name(devlink);
+ const char *dev_name = devlink_dev_name(devlink);
+ struct devlink_default_esw_mode_node *node;
+
+ if (devlink_default_esw_mode_match_all)
+ return true;
+
+ node = devlink_default_esw_mode_node_find(bus_name, dev_name);
+ return !!node;
+}
+
+void devlink_default_esw_mode_apply(struct devlink *devlink)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ int err;
+
+ devl_assert_locked(devlink);
+
+ if (!devlink_default_esw_mode_match(devlink))
+ return;
+
+ if (!ops->eswitch_mode_set) {
+ if (!devlink_default_esw_mode_match_all)
+ devl_warn(devlink,
+ "devlink_eswitch_mode= selected this device but eswitch mode setting is not supported\n");
+ return;
+ }
+
+ err = devlink_eswitch_mode_set(devlink, devlink_default_esw_mode, NULL);
+ if (err)
+ devl_warn(devlink,
+ "Couldn't apply default eswitch mode, err %d\n",
+ err);
+}
+
+static void
+devlink_default_esw_mode_apply_queue(struct devlink *devlink,
+ unsigned long delay)
+{
+ if (!devlink_default_esw_mode_enabled || !devlink_default_esw_mode_wq)
+ return;
+ if (!devlink_try_get(devlink))
+ return;
+ if (!queue_delayed_work(devlink_default_esw_mode_wq,
+ &devlink->default_esw_mode_apply_dw,
+ delay))
+ devlink_put(devlink);
+}
+
+static void devlink_default_esw_mode_apply_work(struct work_struct *work)
+{
+ unsigned long delay = DEVLINK_DEFAULT_ESW_MODE_APPLY_DELAY;
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct devlink *devlink;
+
+ devlink = container_of(dwork, struct devlink,
+ default_esw_mode_apply_dw);
+ if (!devl_trylock(devlink)) {
+ if (__devl_is_registered(devlink))
+ devlink_default_esw_mode_apply_queue(devlink, delay);
+ devlink_put(devlink);
+ return;
+ }
+
+ if (devl_is_registered(devlink) &&
+ devlink->default_esw_mode_apply_pending) {
+ devlink_default_esw_mode_apply(devlink);
+ devlink->default_esw_mode_apply_pending = false;
+ }
+
+ devl_unlock(devlink);
+ devlink_put(devlink);
+}
+
+void devlink_default_esw_mode_apply_schedule(struct devlink *devlink)
+{
+ devl_assert_locked(devlink);
+
+ devlink->default_esw_mode_apply_pending = true;
+ devlink_default_esw_mode_apply_queue(devlink, 0);
+}
+
+void devlink_default_esw_mode_apply_disable(struct devlink *devlink)
+{
+ devl_assert_locked(devlink);
+
+ devlink->default_esw_mode_apply_pending = false;
+}
+
+static void devlink_default_esw_mode_apply_cancel(struct devlink *devlink)
+{
+ if (cancel_delayed_work_sync(&devlink->default_esw_mode_apply_dw))
+ devlink_put(devlink);
+}
+
static int __init devlink_default_esw_mode_setup(char *str)
{
devlink_default_esw_mode_param = str;
@@ -577,6 +682,12 @@ struct devlink *devlinks_xa_lookup_get(struct net *net, unsigned long index)
* Make @devlink visible to userspace. Drivers must call this only after the
* instance is fully initialized and its devlink operations can be called.
*
+ * If a matching devlink_eswitch_mode= default was provided on the kernel
+ * command line, devlink core schedules async work to apply it after
+ * registration. Drivers implementing eswitch_mode_set() must therefore be
+ * ready to perform the same work as a userspace eswitch mode set request from
+ * this point, including creation of representors and other eswitch state.
+ *
* Context: Caller must hold the devlink instance lock. Use devlink_register()
* when the lock is not already held.
*
@@ -590,6 +701,7 @@ int devl_register(struct devlink *devlink)
xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_notify_register(devlink);
devlink_rel_nested_in_notify(devlink);
+ devlink_default_esw_mode_apply_schedule(devlink);
return 0;
}
@@ -612,6 +724,7 @@ void devl_unregister(struct devlink *devlink)
ASSERT_DEVLINK_REGISTERED(devlink);
devl_assert_locked(devlink);
+ devlink_default_esw_mode_apply_disable(devlink);
devlink_notify_unregister(devlink);
xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
devlink_rel_put(devlink);
@@ -673,6 +786,9 @@ struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size,
INIT_LIST_HEAD(&devlink->trap_group_list);
INIT_LIST_HEAD(&devlink->trap_policer_list);
INIT_RCU_WORK(&devlink->rwork, devlink_release);
+ INIT_DELAYED_WORK(&devlink->default_esw_mode_apply_dw,
+ devlink_default_esw_mode_apply_work);
+ devlink->default_esw_mode_apply_pending = true;
lockdep_register_key(&devlink->lock_key);
mutex_init(&devlink->lock);
lockdep_set_class(&devlink->lock, &devlink->lock_key);
@@ -716,6 +832,7 @@ EXPORT_SYMBOL_GPL(devlink_alloc_ns);
void devlink_free(struct devlink *devlink)
{
ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ devlink_default_esw_mode_apply_cancel(devlink);
devlink_rel_put(devlink);
@@ -775,35 +892,59 @@ static struct notifier_block devlink_port_netdevice_nb = {
.notifier_call = devlink_port_netdevice_event,
};
-static int __init devlink_init(void)
+static int __init devlink_default_esw_mode_init(void)
{
+ char *def;
int err;
- if (devlink_default_esw_mode_param) {
- char *def;
-
- def = kstrdup(devlink_default_esw_mode_param, GFP_KERNEL);
- if (!def) {
- devlink_default_esw_mode_param = NULL;
- pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate memory\n");
- } else {
- err = devlink_default_esw_mode_parse(def);
- kfree(def);
- if (err == -EEXIST) {
- devlink_default_esw_mode_param = NULL;
- pr_warn("devlink: duplicate eswitch mode handles ignored\n");
- } else if (err == -EINVAL) {
- devlink_default_esw_mode_param = NULL;
- pr_warn("devlink: invalid devlink_eswitch_mode parameter ignored\n");
- } else if (err == -ENOMEM) {
- devlink_default_esw_mode_param = NULL;
- pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate memory\n");
- } else if (err) {
- goto out;
- }
- }
+ if (!devlink_default_esw_mode_param)
+ return 0;
+
+ def = kstrdup(devlink_default_esw_mode_param, GFP_KERNEL);
+ if (!def) {
+ devlink_default_esw_mode_param = NULL;
+ pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate memory\n");
+ return 0;
+ }
+
+ err = devlink_default_esw_mode_parse(def);
+ kfree(def);
+ if (err == -EEXIST) {
+ devlink_default_esw_mode_param = NULL;
+ pr_warn("devlink: duplicate eswitch mode handles ignored\n");
+ return 0;
+ } else if (err == -EINVAL) {
+ devlink_default_esw_mode_param = NULL;
+ pr_warn("devlink: invalid devlink_eswitch_mode parameter ignored\n");
+ return 0;
+ } else if (err == -ENOMEM) {
+ devlink_default_esw_mode_param = NULL;
+ pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate memory\n");
+ return 0;
+ } else if (err) {
+ return err;
}
+ devlink_default_esw_mode_wq = alloc_workqueue("devlink_default_esw_mode",
+ WQ_UNBOUND | WQ_MEM_RECLAIM,
+ 0);
+ if (!devlink_default_esw_mode_wq) {
+ devlink_default_esw_mode_param = NULL;
+ devlink_default_esw_mode_nodes_clear();
+ pr_warn("devlink: devlink_eswitch_mode parameter ignored, failed to allocate workqueue\n");
+ }
+
+ return 0;
+}
+
+static int __init devlink_init(void)
+{
+ int err;
+
+ err = devlink_default_esw_mode_init();
+ if (err)
+ goto out;
+
err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
@@ -819,8 +960,11 @@ static int __init devlink_init(void)
out_unreg_pernet_subsys:
unregister_pernet_subsys(&devlink_pernet_ops);
out:
- if (err)
+ if (err) {
+ if (devlink_default_esw_mode_wq)
+ destroy_workqueue(devlink_default_esw_mode_wq);
devlink_default_esw_mode_nodes_clear();
+ }
WARN_ON(err);
return err;
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index 4fb02bb993c1..7f6ed52a5f73 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -478,6 +478,11 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net,
return err;
WARN_ON(!(*actions_performed & BIT(action)));
+ if (*actions_performed & BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT)) {
+ devlink_default_esw_mode_apply_disable(devlink);
+ devlink_default_esw_mode_apply(devlink);
+ }
+
/* Catch driver on updating the remote action within devlink reload */
WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
sizeof(remote_reload_stats)));
@@ -731,6 +736,7 @@ int devlink_nl_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
u16 mode;
if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+ devlink_default_esw_mode_apply_disable(devlink);
mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
err = devlink_eswitch_mode_set(devlink, mode, info->extack);
if (err)
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index 97be77d3ed42..d6ff233da974 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -58,8 +58,10 @@ struct devlink {
struct mutex lock;
struct lock_class_key lock_key;
u8 reload_failed:1;
+ u8 default_esw_mode_apply_pending:1;
refcount_t refcount;
struct rcu_work rwork;
+ struct delayed_work default_esw_mode_apply_dw;
struct devlink_rel *rel;
struct xarray nested_rels;
char priv[] __aligned(NETDEV_ALIGN);
@@ -71,6 +73,9 @@ extern struct genl_family devlink_nl_family;
struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size,
struct net *net, struct device *dev,
const struct device_driver *dev_driver);
+void devlink_default_esw_mode_apply(struct devlink *devlink);
+void devlink_default_esw_mode_apply_schedule(struct devlink *devlink);
+void devlink_default_esw_mode_apply_disable(struct devlink *devlink);
#define devl_warn(devlink, format, args...) \
do { \
--
2.43.0
next prev parent reply other threads:[~2026-06-29 18:22 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-29 18:20 [PATCH net-next V4 0/6] evlink: Add boot-time eswitch mode defaults Mark Bloch
2026-06-29 18:20 ` [PATCH net-next V4 1/6] net/mlx5: Clear FW reset-in-progress bit before reload Mark Bloch
2026-06-29 18:20 ` [PATCH net-next V4 2/6] devlink: Factor out eswitch mode setting Mark Bloch
2026-06-29 18:20 ` [PATCH net-next V4 3/6] devlink: Parse eswitch mode boot defaults Mark Bloch
2026-06-29 18:20 ` Mark Bloch [this message]
2026-06-29 18:21 ` [PATCH net-next V4 5/6] devlink: Add API to apply eswitch mode boot default Mark Bloch
2026-06-29 18:21 ` [PATCH net-next V4 6/6] net/mlx5: Apply devlink eswitch mode boot default on probe Mark Bloch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260629182102.245150-5-mbloch@nvidia.com \
--to=mbloch@nvidia.com \
--cc=andrew+netdev@lunn.ch \
--cc=corbet@lwn.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=jiri@resnulli.us \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=skhan@linuxfoundation.org \
--cc=tariqt@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox