* [RFC net-next 1/4] devlink: Add infrastructure for boot-time defaults
2026-05-06 12:37 [RFC net-next 0/4] devlink: Add boot-time defaults Mark Bloch
@ 2026-05-06 12:37 ` Mark Bloch
2026-05-06 12:37 ` [RFC net-next 2/4] devlink: Add eswitch mode boot default Mark Bloch
` (3 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Mark Bloch @ 2026-05-06 12:37 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Jonathan Corbet, Shuah Khan, Jiri Pirko, Simon Horman,
Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Andrew Morton, Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
linux-kernel, netdev, linux-rdma
Add generic devlink boot-default infrastructure driven by the
devlink= kernel command line parameter.
The parser stores defaults per devlink handle using the same
bus/device handle format exposed by devlink. Each handle keeps an
ordered list of parsed commands so that defaults can later be applied
in command-line order when the matching devlink instance is initialized.
This commit only adds the generic parsing, storage, duplicate handling
and devl_apply_defaults() API. Concrete default commands are added in
later commits.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
---
include/net/devlink.h | 1 +
net/devlink/core.c | 441 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 442 insertions(+)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index bcd31de1f890..058654d6800f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1622,6 +1622,7 @@ int devl_trylock(struct devlink *devlink);
void devl_unlock(struct devlink *devlink);
void devl_assert_locked(struct devlink *devlink);
bool devl_lock_is_held(struct devlink *devlink);
+int devl_apply_defaults(struct devlink *devlink);
DEFINE_GUARD(devl, struct devlink *, devl_lock(_T), devl_unlock(_T));
struct ib_device;
diff --git a/net/devlink/core.c b/net/devlink/core.c
index eeb6a71f5f56..2421a1f8dbb7 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -4,6 +4,11 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/string.h>
#include <net/genetlink.h>
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
@@ -16,6 +21,418 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_trap_report);
DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+static char *devlink_default;
+static LIST_HEAD(devlink_default_nodes);
+
+struct devlink_default_attr_item {
+ enum devlink_attr attr;
+ union {
+ enum devlink_eswitch_mode eswitch_mode;
+ struct {
+ char *name;
+ char *value;
+ } param;
+ } value;
+};
+
+struct devlink_default_cmd_item {
+ struct list_head list;
+ enum devlink_command cmd;
+ int (*run)(struct devlink *devlink,
+ const struct devlink_default_attr_item *attr);
+ struct devlink_default_attr_item attr;
+};
+
+struct devlink_default_node {
+ struct list_head list;
+ char *bus_name;
+ char *dev_name;
+ struct list_head cmd_list;
+};
+
+struct devlink_default_cmd_spec {
+ const char *name;
+ enum devlink_command cmd;
+ int (*run)(struct devlink *devlink,
+ const struct devlink_default_attr_item *attr);
+ int (*attr_parse)(char *str,
+ struct devlink_default_attr_item *attr_item);
+};
+
+static int __init
+devlink_default_node_add(const char *bus_name, const char *dev_name,
+ const char *cmd);
+
+static void __init
+devlink_default_attr_free(struct devlink_default_attr_item *attr)
+{
+ if (attr->attr != DEVLINK_ATTR_PARAM)
+ return;
+
+ kfree(attr->value.param.name);
+ kfree(attr->value.param.value);
+}
+
+static const struct devlink_default_cmd_spec *__init
+devlink_default_cmd_spec_find(const char *name)
+{
+ return NULL;
+}
+
+static int __init
+devlink_default_cmd_parse(char *str,
+ struct devlink_default_cmd_item *cmd_item)
+{
+ const struct devlink_default_cmd_spec *spec;
+ struct devlink_default_attr_item attr_item = {};
+ char *cmd_name;
+ int err;
+
+ cmd_name = strsep(&str, ":");
+ if (!cmd_name || !*cmd_name || !str || !*str)
+ return -EINVAL;
+
+ spec = devlink_default_cmd_spec_find(cmd_name);
+ if (!spec)
+ return -EINVAL;
+
+ err = spec->attr_parse(str, &attr_item);
+ if (err) {
+ devlink_default_attr_free(&attr_item);
+ return err;
+ }
+ if (cmd_item) {
+ cmd_item->cmd = spec->cmd;
+ cmd_item->run = spec->run;
+ cmd_item->attr = attr_item;
+ } else {
+ devlink_default_attr_free(&attr_item);
+ }
+
+ return 0;
+}
+
+static int __init
+devlink_default_cmd_parse_copy(const char *str,
+ struct devlink_default_cmd_item *cmd_item)
+{
+ char *cmd;
+ int err;
+
+ cmd = kstrdup(str, GFP_KERNEL);
+ if (!cmd)
+ return -ENOMEM;
+
+ err = devlink_default_cmd_parse(cmd, cmd_item);
+ kfree(cmd);
+ return err;
+}
+
+static int __init
+devlink_default_handle_parse(char *handle, char **bus_name, char **dev_name)
+{
+ char *slash;
+ char *p;
+
+ if (!handle || !*handle)
+ return -EINVAL;
+
+ for (p = handle; *p; p++) {
+ if (isspace(*p))
+ return -EINVAL;
+ if (*p == '[' || *p == ']')
+ return -EINVAL;
+ }
+
+ slash = strchr(handle, '/');
+ if (!slash || slash == handle || !slash[1])
+ return -EINVAL;
+ if (strchr(slash + 1, '/'))
+ return -EINVAL;
+
+ *slash = '\0';
+ if (strchr(handle, ':'))
+ return -EINVAL;
+
+ *bus_name = handle;
+ *dev_name = slash + 1;
+ return 0;
+}
+
+static int __init
+devlink_default_entry_parse(char *entry, bool store)
+{
+ char *handles_end;
+ char *handles;
+ char *handle;
+ char *cmd;
+ int err;
+
+ if (!entry || *entry != '[')
+ return -EINVAL;
+
+ handles = entry + 1;
+ handles_end = strchr(handles, ']');
+ if (!handles_end || handles_end[1] != ':' || !handles_end[2])
+ return -EINVAL;
+
+ *handles_end = '\0';
+ cmd = handles_end + 2;
+ if (!*handles)
+ return -EINVAL;
+
+ while ((handle = strsep(&handles, ",")) != NULL) {
+ char *bus_name;
+ char *dev_name;
+
+ err = devlink_default_handle_parse(handle, &bus_name,
+ &dev_name);
+ if (err)
+ return err;
+
+ if (!store)
+ continue;
+
+ err = devlink_default_node_add(bus_name, dev_name, cmd);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void __init
+devlink_default_cmd_item_free(struct devlink_default_cmd_item *cmd)
+{
+ devlink_default_attr_free(&cmd->attr);
+ kfree(cmd);
+}
+
+static void __init devlink_default_node_free(struct devlink_default_node *node)
+{
+ struct devlink_default_cmd_item *cmd;
+ struct devlink_default_cmd_item *cmd_tmp;
+
+ list_for_each_entry_safe(cmd, cmd_tmp, &node->cmd_list, list) {
+ list_del(&cmd->list);
+ devlink_default_cmd_item_free(cmd);
+ }
+
+ kfree(node->bus_name);
+ kfree(node->dev_name);
+ kfree(node);
+}
+
+static void __init devlink_default_nodes_clear(void)
+{
+ struct devlink_default_node *node;
+ struct devlink_default_node *node_tmp;
+
+ list_for_each_entry_safe(node, node_tmp, &devlink_default_nodes, list) {
+ list_del(&node->list);
+ devlink_default_node_free(node);
+ }
+}
+
+static struct devlink_default_node *__init
+devlink_default_node_find(const char *bus_name, const char *dev_name)
+{
+ struct devlink_default_node *node;
+
+ list_for_each_entry(node, &devlink_default_nodes, list) {
+ if (!strcmp(node->bus_name, bus_name) &&
+ !strcmp(node->dev_name, dev_name))
+ return node;
+ }
+
+ return NULL;
+}
+
+static bool __init
+devlink_default_cmd_equal(const struct devlink_default_cmd_item *a,
+ const struct devlink_default_cmd_item *b)
+{
+ if (a->cmd != b->cmd || a->attr.attr != b->attr.attr)
+ return false;
+
+ return true;
+}
+
+static bool __init
+devlink_default_cmd_exists(struct devlink_default_node *node,
+ const struct devlink_default_cmd_item *cmd)
+{
+ struct devlink_default_cmd_item *cmd_item;
+
+ list_for_each_entry(cmd_item, &node->cmd_list, list) {
+ if (devlink_default_cmd_equal(cmd_item, cmd))
+ return true;
+ }
+
+ return false;
+}
+
+static int __init
+devlink_default_cmd_item_add(struct devlink_default_node *node,
+ const char *cmd_str)
+{
+ struct devlink_default_cmd_item *cmd;
+ int err;
+
+ cmd = kzalloc_obj(*cmd);
+ if (!cmd)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&cmd->list);
+ err = devlink_default_cmd_parse_copy(cmd_str, cmd);
+ if (err) {
+ devlink_default_cmd_item_free(cmd);
+ return err;
+ }
+
+ if (devlink_default_cmd_exists(node, cmd)) {
+ devlink_default_cmd_item_free(cmd);
+ return -EEXIST;
+ }
+
+ list_add_tail(&cmd->list, &node->cmd_list);
+ return 0;
+}
+
+static int __init
+devlink_default_node_add(const char *bus_name, const char *dev_name,
+ const char *cmd_str)
+{
+ struct devlink_default_node *node;
+ int err;
+
+ node = devlink_default_node_find(bus_name, dev_name);
+ if (node)
+ return devlink_default_cmd_item_add(node, cmd_str);
+
+ node = kzalloc_obj(*node);
+ if (!node)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->cmd_list);
+ node->bus_name = kstrdup(bus_name, GFP_KERNEL);
+ node->dev_name = kstrdup(dev_name, GFP_KERNEL);
+ if (!node->bus_name || !node->dev_name) {
+ err = -ENOMEM;
+ goto err_free_node;
+ }
+
+ err = devlink_default_cmd_item_add(node, cmd_str);
+ if (err)
+ goto err_free_node;
+
+ list_add_tail(&node->list, &devlink_default_nodes);
+ return 0;
+
+err_free_node:
+ devlink_default_node_free(node);
+ return err;
+}
+
+static int __init devlink_default_parse(char *str, bool store)
+{
+ char *entry = str;
+ int err;
+
+ if (!str || !*str)
+ return -EINVAL;
+
+ while (entry) {
+ char *handles_end;
+ char *cmd_start;
+ char *entry_end;
+
+ if (*entry != '[') {
+ err = -EINVAL;
+ goto err_clear;
+ }
+
+ handles_end = strchr(entry + 1, ']');
+ if (!handles_end || handles_end[1] != ':') {
+ err = -EINVAL;
+ goto err_clear;
+ }
+
+ cmd_start = handles_end + 2;
+ entry_end = strchr(cmd_start, ',');
+ if (entry_end)
+ *entry_end = '\0';
+
+ err = devlink_default_entry_parse(entry, store);
+ if (err)
+ goto err_clear;
+ if (!entry_end)
+ return 0;
+
+ entry = entry_end + 1;
+ if (!*entry) {
+ err = -EINVAL;
+ goto err_clear;
+ }
+ }
+
+ return 0;
+
+err_clear:
+ if (store)
+ devlink_default_nodes_clear();
+ return err;
+}
+
+static int devlink_default_node_apply(struct devlink *devlink,
+ const struct devlink_default_node *node)
+{
+ const struct devlink_default_cmd_item *cmd;
+ int err;
+
+ list_for_each_entry(cmd, &node->cmd_list, list) {
+ err = cmd->run(devlink, &cmd->attr);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * devl_apply_defaults - Apply defaults matching the devlink instance
+ * @devlink: devlink
+ *
+ * The caller must hold the devlink instance lock.
+ */
+int devl_apply_defaults(struct devlink *devlink)
+{
+ const char *bus_name = devlink_bus_name(devlink);
+ const char *dev_name = devlink_dev_name(devlink);
+ struct devlink_default_node *node;
+
+ devl_assert_locked(devlink);
+
+ list_for_each_entry(node, &devlink_default_nodes, list) {
+ if (strcmp(node->bus_name, bus_name) ||
+ strcmp(node->dev_name, dev_name))
+ continue;
+
+ return devlink_default_node_apply(devlink, node);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_apply_defaults);
+
+static int __init devlink_default_setup(char *str)
+{
+ devlink_default = str;
+ return 1;
+}
+__setup("devlink=", devlink_default_setup);
+
static struct devlink *devlinks_xa_get(unsigned long index)
{
struct devlink *devlink;
@@ -578,6 +995,27 @@ static int __init devlink_init(void)
{
int err;
+ if (devlink_default) {
+ char *def;
+
+ def = kstrdup(devlink_default, GFP_KERNEL);
+ if (!def) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = devlink_default_parse(def, true);
+ kfree(def);
+ if (err == -EEXIST) {
+ devlink_default = NULL;
+ pr_warn("devlink: duplicate defaults ignored\n");
+ } else if (err == -EINVAL) {
+ devlink_default = NULL;
+ pr_warn("devlink: invalid command line parameter ignored\n");
+ } else if (err) {
+ goto out;
+ }
+ }
+
err = register_pernet_subsys(&devlink_pernet_ops);
if (err)
goto out;
@@ -593,7 +1031,10 @@ static int __init devlink_init(void)
out_unreg_pernet_subsys:
unregister_pernet_subsys(&devlink_pernet_ops);
out:
+ if (err)
+ devlink_default_nodes_clear();
WARN_ON(err);
+
return err;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC net-next 2/4] devlink: Add eswitch mode boot default
2026-05-06 12:37 [RFC net-next 0/4] devlink: Add boot-time defaults Mark Bloch
2026-05-06 12:37 ` [RFC net-next 1/4] devlink: Add infrastructure for " Mark Bloch
@ 2026-05-06 12:37 ` Mark Bloch
2026-05-06 12:37 ` [RFC net-next 3/4] devlink: Add runtime parameter boot defaults Mark Bloch
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: Mark Bloch @ 2026-05-06 12:37 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Jonathan Corbet, Shuah Khan, Jiri Pirko, Simon Horman,
Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Andrew Morton, Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
linux-kernel, netdev, linux-rdma
Add support for configuring the devlink eswitch mode from the
devlink= kernel command line parameter.
The supported syntax is:
devlink=[<handle>]:esw:mode:<mode>
where <mode> is one of legacy, switchdev or switchdev_inactive. The
default is applied through the existing eswitch_mode_set() devlink
operation, matching the userspace devlink eswitch set command.
Document the devlink= syntax and the eswitch mode default.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
---
.../admin-guide/kernel-parameters.txt | 24 ++++
.../networking/devlink/devlink-defaults.rst | 99 +++++++++++++++
Documentation/networking/devlink/index.rst | 1 +
net/devlink/core.c | 114 ++++++++++++++++++
4 files changed, 238 insertions(+)
create mode 100644 Documentation/networking/devlink/devlink-defaults.rst
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7834ee927310..150202882870 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1278,6 +1278,30 @@ Kernel parameters
dell_smm_hwmon.fan_max=
[HW] Maximum configurable fan speed.
+ devlink= [NET]
+ Format:
+ <entry>[,<entry>...]
+
+ <entry>:
+ [<handle>[,<handle>...]]:<cmd>:<cmd-options>
+
+ <handle>:
+ <bus-name>/<dev-name>
+
+ Configure default devlink settings for matching
+ devlink instances during device initialization.
+
+ Currently supported settings:
+ esw:mode:{ legacy | switchdev | switchdev_inactive }
+
+ Examples:
+ devlink=[pci/0000:08:00.0]:esw:mode:switchdev
+ devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:legacy
+ devlink=[pci/0000:08:00.0]:esw:mode:switchdev,[pci/0000:08:00.1]:esw:mode:switchdev_inactive
+
+ See Documentation/networking/devlink/devlink-defaults.rst
+ for the full syntax and duplicate handling rules.
+
dfltcc= [HW,S390]
Format: { on | off | def_only | inf_only | always }
on: s390 zlib hardware support for compression on
diff --git a/Documentation/networking/devlink/devlink-defaults.rst b/Documentation/networking/devlink/devlink-defaults.rst
new file mode 100644
index 000000000000..7d6ccaddca86
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-defaults.rst
@@ -0,0 +1,99 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Devlink Defaults
+================
+
+Devlink defaults allow selected devlink settings to be provided on the
+kernel command line and applied to matching devlink instances during device
+initialization.
+
+The devlink device is selected by its devlink handle. For PCI devices this is
+the same handle shown by ``devlink dev show``, for example
+``pci/0000:08:00.0``.
+
+Kernel command line syntax
+==========================
+
+Defaults are specified with the ``devlink=`` kernel command line parameter.
+
+The general syntax is::
+
+ devlink=<default>[,<default>...]
+
+Each default has the following form::
+
+ [<handle-list>]:<cmd>:<cmd-options>
+
+``<handle-list>`` is one or more devlink handles::
+
+ <bus-name>/<dev-name>[,<bus-name>/<dev-name>...]
+
+All handles in the same ``[]`` list receive the same command setting.
+
+Multiple defaults may be specified by separating complete defaults with a
+comma after the value::
+
+ devlink=[pci/0000:08:00.0]:esw:mode:switchdev,[pci/0000:08:00.1]:esw:mode:legacy
+
+Syntax rules
+------------
+
+The following syntax rules apply:
+
+* Specify all defaults in one ``devlink=`` parameter. Repeated ``devlink=``
+ parameters are not accumulated.
+* The ``devlink=`` value is limited by the kernel command line size.
+* Whitespace is not allowed within the parameter value.
+* ``<bus-name>`` and ``<dev-name>`` must not be empty.
+* ``<bus-name>`` must not contain ``:``.
+* ``<dev-name>`` may contain ``:``. This allows PCI names such as
+ ``0000:08:00.0``.
+* Handles must not contain whitespace, ``[``, ``]`` or more than one ``/``.
+* A comma inside ``[]`` separates handles.
+* A comma after the ``<value>`` separates defaults.
+* Defaults for the same handle are applied in command-line order.
+* The same ``esw`` attribute may be specified only once for a given devlink
+ handle.
+* Duplicate entries for the same handle are rejected and all devlink defaults
+ are ignored.
+
+Supported defaults
+==================
+
+The supported command is ``esw``:
+
+.. list-table::
+ :widths: 10 25 35
+ :header-rows: 1
+
+ * - Command
+ - Options
+ - Values
+ * - ``esw``
+ - ``mode:<mode>``
+ - ``legacy``, ``switchdev``, ``switchdev_inactive``
+
+The ``esw:mode`` default corresponds to the userspace command::
+
+ devlink dev eswitch set <handle> mode <value>
+
+
+Examples
+========
+
+Set one PCI devlink instance to switchdev mode::
+
+ devlink=[pci/0000:08:00.0]:esw:mode:switchdev
+
+Set two PCI devlink instances to legacy mode::
+
+ devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:legacy
+
+Set different modes for different PCI devlink instances::
+
+ devlink=[pci/0000:08:00.0]:esw:mode:switchdev,[pci/0000:08:00.1]:esw:mode:switchdev_inactive
+
+The following is invalid because the same handle receives ``esw:mode`` twice::
+
+ devlink=[pci/0000:08:00.0]:esw:mode:legacy,[pci/0000:08:00.0]:esw:mode:switchdev
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index f7ba7dcf477d..0d27a7008b14 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -56,6 +56,7 @@ general.
:maxdepth: 1
devlink-dpipe
+ devlink-defaults
devlink-eswitch-attr
devlink-flash
devlink-health
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 2421a1f8dbb7..4b404135181c 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -73,9 +73,123 @@ devlink_default_attr_free(struct devlink_default_attr_item *attr)
kfree(attr->value.param.value);
}
+struct devlink_default_attr_spec {
+ const char *name;
+ enum devlink_attr attr;
+ int (*value_parse)(const char *value,
+ struct devlink_default_attr_item *attr_item);
+};
+
+static int __init
+devlink_default_attr_parse(char *str,
+ const struct devlink_default_attr_spec *attrs,
+ size_t attrs_count,
+ struct devlink_default_attr_item *attr_item)
+{
+ char *attr_name;
+ char *value;
+ size_t i;
+
+ attr_name = strsep(&str, ":");
+ if (!attr_name || !*attr_name || !str || !*str)
+ return -EINVAL;
+
+ value = str;
+ for (i = 0; i < attrs_count; i++) {
+ if (!strcmp(attr_name, attrs[i].name)) {
+ attr_item->attr = attrs[i].attr;
+ return attrs[i].value_parse(value, attr_item);
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int __init
+devlink_default_esw_mode_to_value(const char *str,
+ enum devlink_eswitch_mode *mode)
+{
+ if (!strcmp(str, "legacy")) {
+ *mode = DEVLINK_ESWITCH_MODE_LEGACY;
+ return 0;
+ }
+ if (!strcmp(str, "switchdev")) {
+ *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV;
+ return 0;
+ }
+ if (!strcmp(str, "switchdev_inactive")) {
+ *mode = DEVLINK_ESWITCH_MODE_SWITCHDEV_INACTIVE;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int __init
+devlink_default_esw_mode_parse(const char *str,
+ struct devlink_default_attr_item *attr_item)
+{
+ enum devlink_eswitch_mode mode;
+ int err;
+
+ err = devlink_default_esw_mode_to_value(str, &mode);
+ if (err)
+ return err;
+
+ attr_item->value.eswitch_mode = mode;
+ return 0;
+}
+
+static const struct devlink_default_attr_spec devlink_default_esw_attrs[] __initconst = {
+ { "mode", DEVLINK_ATTR_ESWITCH_MODE, devlink_default_esw_mode_parse },
+};
+
+static int __init
+devlink_default_esw_attr_parse(char *str,
+ struct devlink_default_attr_item *attr_item)
+{
+ return devlink_default_attr_parse(str, devlink_default_esw_attrs,
+ ARRAY_SIZE(devlink_default_esw_attrs),
+ attr_item);
+}
+
+static int
+devlink_default_eswitch_apply(struct devlink *devlink,
+ const struct devlink_default_attr_item *attr)
+{
+ const struct devlink_ops *ops = devlink->ops;
+
+ switch (attr->attr) {
+ case DEVLINK_ATTR_ESWITCH_MODE:
+ if (!ops->eswitch_mode_set)
+ return -EOPNOTSUPP;
+
+ return ops->eswitch_mode_set(devlink, attr->value.eswitch_mode,
+ NULL);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static const struct devlink_default_cmd_spec devlink_default_cmds[] __initconst = {
+ {
+ .name = "esw",
+ .cmd = DEVLINK_CMD_ESWITCH_SET,
+ .run = devlink_default_eswitch_apply,
+ .attr_parse = devlink_default_esw_attr_parse,
+ },
+};
+
static const struct devlink_default_cmd_spec *__init
devlink_default_cmd_spec_find(const char *name)
{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_default_cmds); i++) {
+ if (!strcmp(name, devlink_default_cmds[i].name))
+ return &devlink_default_cmds[i];
+ }
+
return NULL;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC net-next 3/4] devlink: Add runtime parameter boot defaults
2026-05-06 12:37 [RFC net-next 0/4] devlink: Add boot-time defaults Mark Bloch
2026-05-06 12:37 ` [RFC net-next 1/4] devlink: Add infrastructure for " Mark Bloch
2026-05-06 12:37 ` [RFC net-next 2/4] devlink: Add eswitch mode boot default Mark Bloch
@ 2026-05-06 12:37 ` Mark Bloch
2026-05-06 12:37 ` [RFC net-next 4/4] net/mlx5: Apply devlink boot defaults during init Mark Bloch
2026-05-06 15:22 ` [RFC net-next 0/4] devlink: Add boot-time defaults Jiri Pirko
4 siblings, 0 replies; 7+ messages in thread
From: Mark Bloch @ 2026-05-06 12:37 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Jonathan Corbet, Shuah Khan, Jiri Pirko, Simon Horman,
Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Andrew Morton, Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
linux-kernel, netdev, linux-rdma
Add support for setting devlink device parameters from the devlink=
kernel command line parameter.
The supported syntax is:
devlink=[<handle>]:param:<name>:<value>
Parameter values are parsed according to the registered devlink
parameter type and are applied in runtime configuration mode. Driverinit
and permanent configuration modes are intentionally not part of the
boot-default syntax.
Add a helper that finds a parameter by name, verifies runtime mode
support, converts the string value, runs the parameter validator and
invokes the existing set callback.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
---
.../admin-guide/kernel-parameters.txt | 2 +
.../networking/devlink/devlink-defaults.rst | 18 ++-
net/devlink/core.c | 110 ++++++++++++------
net/devlink/devl_internal.h | 3 +
net/devlink/param.c | 70 +++++++++++
5 files changed, 165 insertions(+), 38 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 150202882870..761ae45b8607 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1293,9 +1293,11 @@ Kernel parameters
Currently supported settings:
esw:mode:{ legacy | switchdev | switchdev_inactive }
+ param:<name>:<value>
Examples:
devlink=[pci/0000:08:00.0]:esw:mode:switchdev
+ devlink=[pci/0000:08:00.0]:param:flow_steering_mode:smfs
devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:legacy
devlink=[pci/0000:08:00.0]:esw:mode:switchdev,[pci/0000:08:00.1]:esw:mode:switchdev_inactive
diff --git a/Documentation/networking/devlink/devlink-defaults.rst b/Documentation/networking/devlink/devlink-defaults.rst
index 7d6ccaddca86..0d4036e59e88 100644
--- a/Documentation/networking/devlink/devlink-defaults.rst
+++ b/Documentation/networking/devlink/devlink-defaults.rst
@@ -55,13 +55,16 @@ The following syntax rules apply:
* Defaults for the same handle are applied in command-line order.
* The same ``esw`` attribute may be specified only once for a given devlink
handle.
+* The same ``param`` name may be specified only once for a given devlink
+ handle.
* Duplicate entries for the same handle are rejected and all devlink defaults
are ignored.
+* Parameter names and values must not contain ``:`` or ``,``.
Supported defaults
==================
-The supported command is ``esw``:
+The supported commands are ``esw`` and ``param``:
.. list-table::
:widths: 10 25 35
@@ -73,11 +76,16 @@ The supported command is ``esw``:
* - ``esw``
- ``mode:<mode>``
- ``legacy``, ``switchdev``, ``switchdev_inactive``
+ * - ``param``
+ - ``<name>:<value>``
+ - ``<value>`` is parsed according to the registered devlink parameter
+ type. Only runtime devlink parameters are supported.
The ``esw:mode`` default corresponds to the userspace command::
devlink dev eswitch set <handle> mode <value>
+The ``param`` default applies the named devlink parameter in runtime mode.
Examples
========
@@ -90,6 +98,10 @@ Set two PCI devlink instances to legacy mode::
devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:legacy
+Set a runtime devlink device parameter::
+
+ devlink=[pci/0000:08:00.0]:param:flow_steering_mode:smfs
+
Set different modes for different PCI devlink instances::
devlink=[pci/0000:08:00.0]:esw:mode:switchdev,[pci/0000:08:00.1]:esw:mode:switchdev_inactive
@@ -97,3 +109,7 @@ Set different modes for different PCI devlink instances::
The following is invalid because the same handle receives ``esw:mode`` twice::
devlink=[pci/0000:08:00.0]:esw:mode:legacy,[pci/0000:08:00.0]:esw:mode:switchdev
+
+The following is invalid because the same handle receives ``param:x`` twice::
+
+ devlink=[pci/0]:param:x:1,[pci/0]:param:x:2
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 4b404135181c..22990793ab8c 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -50,6 +50,13 @@ struct devlink_default_node {
struct list_head cmd_list;
};
+struct devlink_default_attr_spec {
+ const char *name;
+ enum devlink_attr attr;
+ int (*value_parse)(const char *value,
+ struct devlink_default_attr_item *attr_item);
+};
+
struct devlink_default_cmd_spec {
const char *name;
enum devlink_command cmd;
@@ -73,13 +80,6 @@ devlink_default_attr_free(struct devlink_default_attr_item *attr)
kfree(attr->value.param.value);
}
-struct devlink_default_attr_spec {
- const char *name;
- enum devlink_attr attr;
- int (*value_parse)(const char *value,
- struct devlink_default_attr_item *attr_item);
-};
-
static int __init
devlink_default_attr_parse(char *str,
const struct devlink_default_attr_spec *attrs,
@@ -153,6 +153,33 @@ devlink_default_esw_attr_parse(char *str,
attr_item);
}
+static int __init
+devlink_default_param_attr_parse(char *str,
+ struct devlink_default_attr_item *attr_item)
+{
+ char *name;
+ char *value;
+
+ attr_item->attr = DEVLINK_ATTR_PARAM;
+
+ name = strsep(&str, ":");
+ value = strsep(&str, ":");
+ if (!name || !*name || !value || !*value || str)
+ return -EINVAL;
+
+ attr_item->value.param.name = kstrdup(name, GFP_KERNEL);
+ if (!attr_item->value.param.name)
+ return -ENOMEM;
+ attr_item->value.param.value = kstrdup(value, GFP_KERNEL);
+ if (!attr_item->value.param.value) {
+ kfree(attr_item->value.param.name);
+ attr_item->value.param.name = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int
devlink_default_eswitch_apply(struct devlink *devlink,
const struct devlink_default_attr_item *attr)
@@ -171,6 +198,17 @@ devlink_default_eswitch_apply(struct devlink *devlink,
}
}
+static int
+devlink_default_param_apply(struct devlink *devlink,
+ const struct devlink_default_attr_item *attr)
+{
+ if (attr->attr != DEVLINK_ATTR_PARAM)
+ return -EOPNOTSUPP;
+
+ return devlink_param_set_from_string(devlink, attr->value.param.name,
+ attr->value.param.value);
+}
+
static const struct devlink_default_cmd_spec devlink_default_cmds[] __initconst = {
{
.name = "esw",
@@ -178,52 +216,46 @@ static const struct devlink_default_cmd_spec devlink_default_cmds[] __initconst
.run = devlink_default_eswitch_apply,
.attr_parse = devlink_default_esw_attr_parse,
},
+ {
+ .name = "param",
+ .cmd = DEVLINK_CMD_PARAM_SET,
+ .run = devlink_default_param_apply,
+ .attr_parse = devlink_default_param_attr_parse,
+ },
};
-static const struct devlink_default_cmd_spec *__init
-devlink_default_cmd_spec_find(const char *name)
-{
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(devlink_default_cmds); i++) {
- if (!strcmp(name, devlink_default_cmds[i].name))
- return &devlink_default_cmds[i];
- }
-
- return NULL;
-}
-
static int __init
devlink_default_cmd_parse(char *str,
struct devlink_default_cmd_item *cmd_item)
{
- const struct devlink_default_cmd_spec *spec;
struct devlink_default_attr_item attr_item = {};
char *cmd_name;
int err;
+ size_t i;
cmd_name = strsep(&str, ":");
if (!cmd_name || !*cmd_name || !str || !*str)
return -EINVAL;
- spec = devlink_default_cmd_spec_find(cmd_name);
- if (!spec)
- return -EINVAL;
-
- err = spec->attr_parse(str, &attr_item);
- if (err) {
- devlink_default_attr_free(&attr_item);
- return err;
- }
- if (cmd_item) {
- cmd_item->cmd = spec->cmd;
- cmd_item->run = spec->run;
- cmd_item->attr = attr_item;
- } else {
- devlink_default_attr_free(&attr_item);
+ for (i = 0; i < ARRAY_SIZE(devlink_default_cmds); i++) {
+ if (!strcmp(cmd_name, devlink_default_cmds[i].name)) {
+ err = devlink_default_cmds[i].attr_parse(str, &attr_item);
+ if (err) {
+ devlink_default_attr_free(&attr_item);
+ return err;
+ }
+ if (cmd_item) {
+ cmd_item->cmd = devlink_default_cmds[i].cmd;
+ cmd_item->run = devlink_default_cmds[i].run;
+ cmd_item->attr = attr_item;
+ } else {
+ devlink_default_attr_free(&attr_item);
+ }
+ return 0;
+ }
}
- return 0;
+ return -EINVAL;
}
static int __init
@@ -369,6 +401,10 @@ devlink_default_cmd_equal(const struct devlink_default_cmd_item *a,
if (a->cmd != b->cmd || a->attr.attr != b->attr.attr)
return false;
+ if (a->cmd == DEVLINK_CMD_PARAM_SET)
+ return !strcmp(a->attr.value.param.name,
+ b->attr.value.param.name);
+
return true;
}
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index e4e48ee2da5a..bde333c22f18 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -212,6 +212,9 @@ static inline int devlink_nl_put_u64(struct sk_buff *msg, int attrtype, u64 val)
int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net,
struct devlink *devlink, int attrtype);
int devlink_nl_msg_reply_and_new(struct sk_buff **msg, struct genl_info *info);
+int devlink_param_set_from_string(struct devlink *devlink,
+ const char *param_name,
+ const char *value_str);
static inline bool devlink_nl_notify_need(struct devlink *devlink)
{
diff --git a/net/devlink/param.c b/net/devlink/param.c
index cf95268da5b0..d2604fe2eee5 100644
--- a/net/devlink/param.c
+++ b/net/devlink/param.c
@@ -4,6 +4,8 @@
* Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
*/
+#include <linux/kstrtox.h>
+
#include "devl_internal.h"
static const struct devlink_param devlink_param_generic[] = {
@@ -551,6 +553,74 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
return 0;
}
+static int
+devlink_param_value_get_from_str(const struct devlink_param *param,
+ const char *value_str,
+ union devlink_param_value *value)
+{
+ switch (param->type) {
+ case DEVLINK_PARAM_TYPE_U8:
+ return kstrtou8(value_str, 0, &value->vu8);
+ case DEVLINK_PARAM_TYPE_U16:
+ return kstrtou16(value_str, 0, &value->vu16);
+ case DEVLINK_PARAM_TYPE_U32:
+ return kstrtou32(value_str, 0, &value->vu32);
+ case DEVLINK_PARAM_TYPE_U64:
+ return kstrtou64(value_str, 0, &value->vu64);
+ case DEVLINK_PARAM_TYPE_STRING:
+ if (strscpy(value->vstr, value_str, sizeof(value->vstr)) < 0)
+ return -EINVAL;
+ return 0;
+ case DEVLINK_PARAM_TYPE_BOOL:
+ return kstrtobool(value_str, &value->vbool);
+ }
+
+ return -EINVAL;
+}
+
+int devlink_param_set_from_string(struct devlink *devlink,
+ const char *param_name,
+ const char *value_str)
+{
+ struct devlink_param_gset_ctx ctx;
+ struct devlink_param_item *param_item;
+ const struct devlink_param *param;
+ union devlink_param_value value;
+ int err;
+
+ devl_assert_locked(devlink);
+
+ param_item = devlink_param_find_by_name(&devlink->params, param_name);
+ if (!param_item)
+ return -EINVAL;
+ param = param_item->param;
+
+ if (!devlink_param_cmode_is_supported(param,
+ DEVLINK_PARAM_CMODE_RUNTIME))
+ return -EOPNOTSUPP;
+ if (!param->set)
+ return -EOPNOTSUPP;
+
+ err = devlink_param_value_get_from_str(param, value_str, &value);
+ if (err)
+ return err;
+
+ if (param->validate) {
+ err = param->validate(devlink, param->id, value, NULL);
+ if (err)
+ return err;
+ }
+
+ ctx.val = value;
+ ctx.cmode = DEVLINK_PARAM_CMODE_RUNTIME;
+ err = devlink_param_set(devlink, param, &ctx, NULL);
+ if (err)
+ return err;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+ return 0;
+}
+
static struct devlink_param_item *
devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* [RFC net-next 4/4] net/mlx5: Apply devlink boot defaults during init
2026-05-06 12:37 [RFC net-next 0/4] devlink: Add boot-time defaults Mark Bloch
` (2 preceding siblings ...)
2026-05-06 12:37 ` [RFC net-next 3/4] devlink: Add runtime parameter boot defaults Mark Bloch
@ 2026-05-06 12:37 ` Mark Bloch
2026-05-06 15:22 ` [RFC net-next 0/4] devlink: Add boot-time defaults Jiri Pirko
4 siblings, 0 replies; 7+ messages in thread
From: Mark Bloch @ 2026-05-06 12:37 UTC (permalink / raw)
To: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller
Cc: Jonathan Corbet, Shuah Khan, Jiri Pirko, Simon Horman,
Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Mark Bloch,
Andrew Morton, Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
linux-kernel, netdev, linux-rdma
Apply devlink boot defaults for mlx5 devices after successful device
initialization, while holding the devlink instance lock.
At this point the devlink instance is registered and the mlx5 devlink
operations and parameters have been registered, so generic devlink
defaults such as eswitch mode and runtime parameters can be applied to
the matching PCI devlink handle.
Signed-off-by: Mark Bloch <mbloch@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index b1b9ebfd3866..a119d199f9a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1464,6 +1464,8 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
err = mlx5_init_one_devl_locked(dev);
if (err)
devl_unregister(devlink);
+ else
+ devl_apply_defaults(devlink);
unlock:
devl_unlock(devlink);
return err;
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [RFC net-next 0/4] devlink: Add boot-time defaults
2026-05-06 12:37 [RFC net-next 0/4] devlink: Add boot-time defaults Mark Bloch
` (3 preceding siblings ...)
2026-05-06 12:37 ` [RFC net-next 4/4] net/mlx5: Apply devlink boot defaults during init Mark Bloch
@ 2026-05-06 15:22 ` Jiri Pirko
2026-05-06 17:35 ` Mark Bloch
4 siblings, 1 reply; 7+ messages in thread
From: Jiri Pirko @ 2026-05-06 15:22 UTC (permalink / raw)
To: Mark Bloch
Cc: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller, Jonathan Corbet, Shuah Khan, Simon Horman,
Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Andrew Morton,
Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
linux-kernel, netdev, linux-rdma
Wed, May 06, 2026 at 02:37:35PM +0200, mbloch@nvidia.com wrote:
>This series adds a devlink= kernel command line parameter for applying
>selected devlink settings during device initialization.
>
>Following a discussion with Jakub[1], I am sending this RFC to get the
>conversation moving. I started from Jakub's example/request and extended
>it to cover requirements from production systems and configurations that
>customers use.
>
>One important caveat is that the parsing logic in this RFC was written
>with AI assistance. I am also not sure whether the resulting syntax and
>parser are too complex for a kernel command line interface. This is part
>of why I am sending it as an RFC: to understand what direction and level
>of complexity would be acceptable to people.
>
>The implementation is intended to support the following properties:
>
>- A system may have multiple devlink devices that usually need the same
> configuration. For a configuration such as eswitch mode switchdev, a
> user should be able to specify multiple devices to which that
> configuration applies.
>
>- There may be ordering dependencies between options. For example, in
> mlx5, flow_steering_mode should be set before moving to switchdev.
> With this in mind, defaults are applied per device in the left-to-right
> order in which they appear on the command line.
>
>The intent is to let deployments set devlink defaults before normal
>userspace orchestration runs, while still using devlink concepts and
"defaults before normal userspace orchestrarion". I read it as config
before config, which eventually could be skipped.
>driver callbacks rather than adding driver-specific module parameters.
>A default is scoped to one or more devlink handles, for example:
>
> devlink=[pci/0000:08:00.0]:esw:mode:switchdev
> devlink=[pci/0000:08:00.0]:param:flow_steering_mode:smfs
> devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:param:flow_steering_mode:hmfs,[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:switchdev
I don't like this. What you do, you are basically introducing user
configuration tool on kernel cmdline.
The same you would achieve with a proper userspace tool/daemon.
I did try to come up with it and push it here:
https://github.com/systemd/systemd/pull/37393
That didn't get merged for unknown reason, but the idea is sound. You
provide configuration files for devlink object and systemd-devlinkd
will apply when they appear. Wouldn't this help your case?
[..]
^ permalink raw reply [flat|nested] 7+ messages in thread* Re: [RFC net-next 0/4] devlink: Add boot-time defaults
2026-05-06 15:22 ` [RFC net-next 0/4] devlink: Add boot-time defaults Jiri Pirko
@ 2026-05-06 17:35 ` Mark Bloch
0 siblings, 0 replies; 7+ messages in thread
From: Mark Bloch @ 2026-05-06 17:35 UTC (permalink / raw)
To: Jiri Pirko
Cc: Eric Dumazet, Jakub Kicinski, Paolo Abeni, Andrew Lunn,
David S. Miller, Jonathan Corbet, Shuah Khan, Simon Horman,
Saeed Mahameed, Leon Romanovsky, Tariq Toukan, Andrew Morton,
Borislav Petkov (AMD), Randy Dunlap, Dave Hansen,
Christian Brauner, Petr Mladek, Peter Zijlstra (Intel),
Thomas Gleixner, Pawan Gupta, Dapeng Mi, Kees Cook, Marco Elver,
Eric Biggers, Li RongQing, Paul E. McKenney, linux-doc,
linux-kernel, netdev, linux-rdma
On 06/05/2026 18:22, Jiri Pirko wrote:
> Wed, May 06, 2026 at 02:37:35PM +0200, mbloch@nvidia.com wrote:
>> This series adds a devlink= kernel command line parameter for applying
>> selected devlink settings during device initialization.
>>
>> Following a discussion with Jakub[1], I am sending this RFC to get the
>> conversation moving. I started from Jakub's example/request and extended
>> it to cover requirements from production systems and configurations that
>> customers use.
>>
>> One important caveat is that the parsing logic in this RFC was written
>> with AI assistance. I am also not sure whether the resulting syntax and
>> parser are too complex for a kernel command line interface. This is part
>> of why I am sending it as an RFC: to understand what direction and level
>> of complexity would be acceptable to people.
>>
>> The implementation is intended to support the following properties:
>>
>> - A system may have multiple devlink devices that usually need the same
>> configuration. For a configuration such as eswitch mode switchdev, a
>> user should be able to specify multiple devices to which that
>> configuration applies.
>>
>> - There may be ordering dependencies between options. For example, in
>> mlx5, flow_steering_mode should be set before moving to switchdev.
>> With this in mind, defaults are applied per device in the left-to-right
>> order in which they appear on the command line.
>>
>> The intent is to let deployments set devlink defaults before normal
>> userspace orchestration runs, while still using devlink concepts and
>
> "defaults before normal userspace orchestrarion". I read it as config
> before config, which eventually could be skipped.
>
>
>> driver callbacks rather than adding driver-specific module parameters.
>> A default is scoped to one or more devlink handles, for example:
>>
>> devlink=[pci/0000:08:00.0]:esw:mode:switchdev
>> devlink=[pci/0000:08:00.0]:param:flow_steering_mode:smfs
>> devlink=[pci/0000:08:00.0,pci/0000:08:00.1]:param:flow_steering_mode:hmfs,[pci/0000:08:00.0,pci/0000:08:00.1]:esw:mode:switchdev
>
> I don't like this. What you do, you are basically introducing user
> configuration tool on kernel cmdline.
>
> The same you would achieve with a proper userspace tool/daemon.
> I did try to come up with it and push it here:
> https://github.com/systemd/systemd/pull/37393
> That didn't get merged for unknown reason, but the idea is sound. You
> provide configuration files for devlink object and systemd-devlinkd
> will apply when they appear. Wouldn't this help your case?
I agree that systemd-devlinkd is the right shape for normal
devlink configuration, and it could probably replace the udev/devlink
plumbing we use today.
The case I am trying to cover is earlier than that.
On BlueField/ECPF/DPU systems, the host PF driver cannot always finish
probing independently of the ECPF side. When the ECPF is the eswitch
manager, the host PF is kept in initializing state until the ECPF eswitch
side is set up and mlx5 enables the external host PF HCA. That happens as
part of moving the ECPF to switchdev.
Today userspace observes the ECPF instance and then switches the
mode through devlink, usually via udev or similar plumbing. That still
leaves a window where the ECPF has probed, userspace has not applied the
mode yet, and the host PF is waiting. With many ECPFs this becomes visible
in host PF probe/boot time. A daemon reacting to the devlink object
appearing can make the userspace side cleaner, but it still runs after the
device has appeared and after userspace scheduling/uevent handling.
Long term, for these DPU deployments, we would like mlx5 to initialize
directly in switchdev. I am hesitant to make that unconditional because it
changes existing behavior and there is no early opt-out before probe. The
cmdline parameter was meant as an explicit opt-in middle step: ask the
driver to apply the same devlink operation during init, before this path
depends on userspace.
We previously tried to address this with an mlx5 module parameter. By
design, that was too coarse: it applied to all mlx5 devices handled by the
module. That makes it usable only for narrow DPU-only configurations. The
devlink-handle based cmdline syntax was intended to keep the opt-in scoped
to the specific devices that need this early switchdev transition.
Mark
>
> [..]
^ permalink raw reply [flat|nested] 7+ messages in thread