From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>
Cc: Donald Hunter <donald.hunter@gmail.com>,
Jiri Pirko <jiri@resnulli.us>, Jonathan Corbet <corbet@lwn.net>,
Saeed Mahameed <saeedm@nvidia.com>,
"Leon Romanovsky" <leon@kernel.org>,
Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<linux-doc@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
Gal Pressman <gal@nvidia.com>, Moshe Shemesh <moshe@nvidia.com>,
Carolina Jubran <cjubran@nvidia.com>,
Cosmin Ratiu <cratiu@nvidia.com>, Jiri Pirko <jiri@nvidia.com>,
Randy Dunlap <rdunlap@infradead.org>,
Simon Horman <horms@kernel.org>,
Krzysztof Kozlowski <krzk@kernel.org>
Subject: [PATCH net-next V6 06/14] devlink: Decouple rate storage from associated devlink object
Date: Sun, 25 Jan 2026 13:31:55 +0200 [thread overview]
Message-ID: <1769340723-14199-7-git-send-email-tariqt@nvidia.com> (raw)
In-Reply-To: <1769340723-14199-1-git-send-email-tariqt@nvidia.com>
From: Cosmin Ratiu <cratiu@nvidia.com>
Devlink rate leafs and nodes were stored in their respective devlink
objects pointed to by devlink_rate->devlink.
This patch removes that association by introducing the concept of
'rate node devlink', which is where all rates that could link to each
other are stored. For now this is the same as devlink_rate->devlink.
After this patch, the devlink rates stored in this devlink instance
could potentially be from multiple other devlink instances. So all rate
node manipulation code was updated to:
- correctly compare the actual devlink object during iteration.
- maybe acquire additional locks (noop for now).
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
include/net/devlink.h | 2 +
net/devlink/rate.c | 192 +++++++++++++++++++++++++++++++-----------
2 files changed, 143 insertions(+), 51 deletions(-)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index c453faec8ebf..fbb434185a67 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1820,6 +1820,8 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
bool external);
int devl_port_fn_devlink_set(struct devlink_port *devlink_port,
struct devlink *fn_devlink);
+struct devlink *devl_rate_lock(struct devlink *devlink);
+void devl_rate_unlock(struct devlink *devlink);
struct devlink_rate *
devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
struct devlink_rate *parent);
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 0d68b5c477dc..c062fd8a6c36 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -30,13 +30,31 @@ devlink_rate_leaf_get_from_info(struct devlink *devlink, struct genl_info *info)
return devlink_rate ?: ERR_PTR(-ENODEV);
}
+struct devlink *devl_rate_lock(struct devlink *devlink)
+{
+ return devlink;
+}
+
+static struct devlink *
+devl_get_rate_node_instance_locked(struct devlink *devlink)
+{
+ return devlink;
+}
+
+void devl_rate_unlock(struct devlink *devlink)
+{
+}
+
static struct devlink_rate *
devlink_rate_node_get_by_name(struct devlink *devlink, const char *node_name)
{
struct devlink_rate *devlink_rate;
+ struct devlink *rate_devlink;
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- if (devlink_rate_is_node(devlink_rate) &&
+ rate_devlink = devl_get_rate_node_instance_locked(devlink);
+ list_for_each_entry(devlink_rate, &rate_devlink->rate_list, list) {
+ if (devlink_rate->devlink == devlink &&
+ devlink_rate_is_node(devlink_rate) &&
!strcmp(node_name, devlink_rate->name))
return devlink_rate;
}
@@ -190,17 +208,25 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
void devlink_rates_notify_register(struct devlink *devlink)
{
struct devlink_rate *rate_node;
+ struct devlink *rate_devlink;
- list_for_each_entry(rate_node, &devlink->rate_list, list)
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ rate_devlink = devl_rate_lock(devlink);
+ list_for_each_entry(rate_node, &rate_devlink->rate_list, list)
+ if (rate_node->devlink == devlink)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ devl_rate_unlock(devlink);
}
void devlink_rates_notify_unregister(struct devlink *devlink)
{
struct devlink_rate *rate_node;
+ struct devlink *rate_devlink;
- list_for_each_entry_reverse(rate_node, &devlink->rate_list, list)
- devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+ rate_devlink = devl_rate_lock(devlink);
+ list_for_each_entry_reverse(rate_node, &rate_devlink->rate_list, list)
+ if (rate_node->devlink == devlink)
+ devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
+ devl_rate_unlock(devlink);
}
static int
@@ -209,17 +235,20 @@ devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
{
struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_rate *devlink_rate;
+ struct devlink *rate_devlink;
int idx = 0;
int err = 0;
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ rate_devlink = devl_rate_lock(devlink);
+ list_for_each_entry(devlink_rate, &rate_devlink->rate_list, list) {
enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
u32 id = NETLINK_CB(cb->skb).portid;
- if (idx < state->idx) {
+ if (idx < state->idx || devlink_rate->devlink != devlink) {
idx++;
continue;
}
+
err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
cb->nlh->nlmsg_seq, flags, NULL);
if (err) {
@@ -228,6 +257,7 @@ devlink_nl_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
}
idx++;
}
+ devl_rate_unlock(devlink);
return err;
}
@@ -244,23 +274,33 @@ int devlink_nl_rate_get_doit(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
int err;
+ devl_rate_lock(devlink);
devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate))
- return PTR_ERR(devlink_rate);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
+ if (!msg) {
+ err = -ENOMEM;
+ goto unlock;
+ }
err = devlink_nl_rate_fill(msg, devlink_rate, DEVLINK_CMD_RATE_NEW,
info->snd_portid, info->snd_seq, 0,
info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
+ if (err)
+ goto err_fill;
+ devl_rate_unlock(devlink);
return genlmsg_reply(msg, info);
+
+err_fill:
+ nlmsg_free(msg);
+unlock:
+ devl_rate_unlock(devlink);
+ return err;
}
static bool
@@ -590,24 +630,32 @@ int devlink_nl_rate_set_doit(struct sk_buff *skb, struct genl_info *info)
const struct devlink_ops *ops;
int err;
+ devl_rate_lock(devlink);
devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate))
- return PTR_ERR(devlink_rate);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
ops = devlink->ops;
- if (!ops || !devlink_rate_set_ops_supported(ops, info, devlink_rate->type))
- return -EOPNOTSUPP;
+ if (!ops ||
+ !devlink_rate_set_ops_supported(ops, info, devlink_rate->type)) {
+ err = -EOPNOTSUPP;
+ goto unlock;
+ }
err = devlink_nl_rate_set(devlink_rate, ops, info);
if (!err)
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+unlock:
+ devl_rate_unlock(devlink);
return err;
}
int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
{
- struct devlink *devlink = info->user_ptr[0];
+ struct devlink *rate_devlink, *devlink = info->user_ptr[0];
struct devlink_rate *rate_node;
const struct devlink_ops *ops;
int err;
@@ -621,15 +669,21 @@ int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
if (!devlink_rate_set_ops_supported(ops, info, DEVLINK_RATE_TYPE_NODE))
return -EOPNOTSUPP;
+ rate_devlink = devl_rate_lock(devlink);
rate_node = devlink_rate_node_get_from_attrs(devlink, info->attrs);
- if (!IS_ERR(rate_node))
- return -EEXIST;
- else if (rate_node == ERR_PTR(-EINVAL))
- return -EINVAL;
+ if (!IS_ERR(rate_node)) {
+ err = -EEXIST;
+ goto unlock;
+ } else if (rate_node == ERR_PTR(-EINVAL)) {
+ err = -EINVAL;
+ goto unlock;
+ }
rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
- if (!rate_node)
- return -ENOMEM;
+ if (!rate_node) {
+ err = -ENOMEM;
+ goto unlock;
+ }
rate_node->devlink = devlink;
rate_node->type = DEVLINK_RATE_TYPE_NODE;
@@ -648,8 +702,9 @@ int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
goto err_rate_set;
refcount_set(&rate_node->refcnt, 1);
- list_add(&rate_node->list, &devlink->rate_list);
+ list_add(&rate_node->list, &rate_devlink->rate_list);
devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+ devl_rate_unlock(devlink);
return 0;
err_rate_set:
@@ -658,6 +713,8 @@ int devlink_nl_rate_new_doit(struct sk_buff *skb, struct genl_info *info)
kfree(rate_node->name);
err_strdup:
kfree(rate_node);
+unlock:
+ devl_rate_unlock(devlink);
return err;
}
@@ -667,13 +724,17 @@ int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
struct devlink_rate *rate_node;
int err;
+ devl_rate_lock(devlink);
rate_node = devlink_rate_node_get_from_info(devlink, info);
- if (IS_ERR(rate_node))
- return PTR_ERR(rate_node);
+ if (IS_ERR(rate_node)) {
+ err = PTR_ERR(rate_node);
+ goto unlock;
+ }
if (refcount_read(&rate_node->refcnt) > 1) {
NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
- return -EBUSY;
+ err = -EBUSY;
+ goto unlock;
}
devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_DEL);
@@ -684,6 +745,8 @@ int devlink_nl_rate_del_doit(struct sk_buff *skb, struct genl_info *info)
list_del(&rate_node->list);
kfree(rate_node->name);
kfree(rate_node);
+unlock:
+ devl_rate_unlock(devlink);
return err;
}
@@ -692,14 +755,20 @@ int devlink_rates_check(struct devlink *devlink,
struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
+ struct devlink *rate_devlink;
+ int err = 0;
- list_for_each_entry(devlink_rate, &devlink->rate_list, list)
- if (!rate_filter || rate_filter(devlink_rate)) {
+ rate_devlink = devl_rate_lock(devlink);
+ list_for_each_entry(devlink_rate, &rate_devlink->rate_list, list)
+ if (devlink_rate->devlink == devlink &&
+ (!rate_filter || rate_filter(devlink_rate))) {
if (extack)
NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
- return -EBUSY;
+ err = -EBUSY;
+ break;
}
- return 0;
+ devl_rate_unlock(devlink);
+ return err;
}
/**
@@ -716,14 +785,20 @@ devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
struct devlink_rate *parent)
{
struct devlink_rate *rate_node;
+ struct devlink *rate_devlink;
+ rate_devlink = devl_rate_lock(devlink);
rate_node = devlink_rate_node_get_by_name(devlink, node_name);
- if (!IS_ERR(rate_node))
- return ERR_PTR(-EEXIST);
+ if (!IS_ERR(rate_node)) {
+ rate_node = ERR_PTR(-EEXIST);
+ goto unlock;
+ }
rate_node = kzalloc(sizeof(*rate_node), GFP_KERNEL);
- if (!rate_node)
- return ERR_PTR(-ENOMEM);
+ if (!rate_node) {
+ rate_node = ERR_PTR(-ENOMEM);
+ goto unlock;
+ }
if (parent) {
rate_node->parent = parent;
@@ -737,12 +812,15 @@ devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
rate_node->name = kstrdup(node_name, GFP_KERNEL);
if (!rate_node->name) {
kfree(rate_node);
- return ERR_PTR(-ENOMEM);
+ rate_node = ERR_PTR(-ENOMEM);
+ goto unlock;
}
refcount_set(&rate_node->refcnt, 1);
- list_add(&rate_node->list, &devlink->rate_list);
+ list_add(&rate_node->list, &rate_devlink->rate_list);
devlink_rate_notify(rate_node, DEVLINK_CMD_RATE_NEW);
+unlock:
+ devl_rate_unlock(devlink);
return rate_node;
}
EXPORT_SYMBOL_GPL(devl_rate_node_create);
@@ -758,10 +836,10 @@ EXPORT_SYMBOL_GPL(devl_rate_node_create);
int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
struct devlink_rate *parent)
{
- struct devlink *devlink = devlink_port->devlink;
+ struct devlink *rate_devlink, *devlink = devlink_port->devlink;
struct devlink_rate *devlink_rate;
- devl_assert_locked(devlink_port->devlink);
+ devl_assert_locked(devlink);
if (WARN_ON(devlink_port->devlink_rate))
return -EBUSY;
@@ -770,6 +848,7 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
if (!devlink_rate)
return -ENOMEM;
+ rate_devlink = devl_rate_lock(devlink);
if (parent) {
devlink_rate->parent = parent;
refcount_inc(&devlink_rate->parent->refcnt);
@@ -779,9 +858,10 @@ int devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
devlink_rate->devlink = devlink;
devlink_rate->devlink_port = devlink_port;
devlink_rate->priv = priv;
- list_add_tail(&devlink_rate->list, &devlink->rate_list);
+ list_add_tail(&devlink_rate->list, &rate_devlink->rate_list);
devlink_port->devlink_rate = devlink_rate;
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_NEW);
+ devl_rate_unlock(devlink);
return 0;
}
@@ -797,16 +877,19 @@ EXPORT_SYMBOL_GPL(devl_rate_leaf_create);
void devl_rate_leaf_destroy(struct devlink_port *devlink_port)
{
struct devlink_rate *devlink_rate = devlink_port->devlink_rate;
+ struct devlink *devlink = devlink_port->devlink;
- devl_assert_locked(devlink_port->devlink);
+ devl_assert_locked(devlink);
if (!devlink_rate)
return;
+ devl_rate_lock(devlink);
devlink_rate_notify(devlink_rate, DEVLINK_CMD_RATE_DEL);
if (devlink_rate->parent)
refcount_dec(&devlink_rate->parent->refcnt);
list_del(&devlink_rate->list);
devlink_port->devlink_rate = NULL;
+ devl_rate_unlock(devlink);
kfree(devlink_rate);
}
EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
@@ -815,18 +898,22 @@ EXPORT_SYMBOL_GPL(devl_rate_leaf_destroy);
* devl_rate_nodes_destroy - destroy all devlink rate nodes on device
* @devlink: devlink instance
*
- * Unset parent for all rate objects and destroy all rate nodes
- * on specified device.
+ * Unset parent for all rate objects involving this device and destroy all rate
+ * nodes on it.
*/
void devl_rate_nodes_destroy(struct devlink *devlink)
{
const struct devlink_ops *ops = devlink->ops;
struct devlink_rate *devlink_rate, *tmp;
+ struct devlink *rate_devlink;
devl_assert_locked(devlink);
+ rate_devlink = devl_rate_lock(devlink);
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- if (!devlink_rate->parent)
+ list_for_each_entry(devlink_rate, &rate_devlink->rate_list, list) {
+ if (!devlink_rate->parent ||
+ (devlink_rate->devlink != devlink &&
+ devlink_rate->parent->devlink != devlink))
continue;
if (devlink_rate_is_leaf(devlink_rate))
@@ -839,13 +926,16 @@ void devl_rate_nodes_destroy(struct devlink *devlink)
refcount_dec(&devlink_rate->parent->refcnt);
devlink_rate->parent = NULL;
}
- list_for_each_entry_safe(devlink_rate, tmp, &devlink->rate_list, list) {
- if (devlink_rate_is_node(devlink_rate)) {
+ list_for_each_entry_safe(devlink_rate, tmp, &rate_devlink->rate_list,
+ list) {
+ if (devlink_rate->devlink == devlink &&
+ devlink_rate_is_node(devlink_rate)) {
ops->rate_node_del(devlink_rate, devlink_rate->priv, NULL);
list_del(&devlink_rate->list);
kfree(devlink_rate->name);
kfree(devlink_rate);
}
}
+ devl_rate_unlock(devlink);
}
EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy);
--
2.40.1
next prev parent reply other threads:[~2026-01-25 11:33 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-01-25 11:31 [PATCH net-next V6 00/14] devlink and mlx5: Support cross-function rate scheduling Tariq Toukan
2026-01-25 11:31 ` [PATCH net-next V6 01/14] documentation: networking: add shared devlink documentation Tariq Toukan
2026-01-27 13:32 ` [net-next,V6,01/14] " Simon Horman
2026-01-25 11:31 ` [PATCH net-next V6 02/14] devlink: introduce shared devlink instance for PFs on same chip Tariq Toukan
2026-01-25 11:31 ` [PATCH net-next V6 03/14] devlink: Reverse locking order for nested instances Tariq Toukan
2026-01-25 11:31 ` [PATCH net-next V6 04/14] devlink: Add helpers to lock nested-in instances Tariq Toukan
2026-01-25 11:31 ` [PATCH net-next V6 05/14] devlink: Refactor devlink_rate_nodes_check Tariq Toukan
2026-01-25 11:31 ` Tariq Toukan [this message]
2026-01-25 11:31 ` [PATCH net-next V6 07/14] devlink: Add parent dev to devlink API Tariq Toukan
2026-01-27 13:49 ` Simon Horman
2026-01-27 14:25 ` Cosmin Ratiu
2026-01-28 9:20 ` Simon Horman
2026-01-25 11:31 ` [PATCH net-next V6 08/14] devlink: Allow parent dev for rate-set and rate-new Tariq Toukan
2026-01-25 11:31 ` [PATCH net-next V6 09/14] devlink: Allow rate node parents from other devlinks Tariq Toukan
2026-01-25 11:31 ` [PATCH net-next V6 10/14] net/mlx5: Add a shared devlink instance for PFs on same chip Tariq Toukan
2026-01-25 11:32 ` [PATCH net-next V6 11/14] net/mlx5: Expose a function to clear a vport's parent Tariq Toukan
2026-01-25 11:32 ` [PATCH net-next V6 12/14] net/mlx5: Store QoS sched nodes in the sh_devlink Tariq Toukan
2026-01-25 11:32 ` [PATCH net-next V6 13/14] net/mlx5: qos: Support cross-device tx scheduling Tariq Toukan
2026-01-25 11:32 ` [PATCH net-next V6 14/14] net/mlx5: Document devlink rates Tariq Toukan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1769340723-14199-7-git-send-email-tariqt@nvidia.com \
--to=tariqt@nvidia.com \
--cc=andrew+netdev@lunn.ch \
--cc=cjubran@nvidia.com \
--cc=corbet@lwn.net \
--cc=cratiu@nvidia.com \
--cc=davem@davemloft.net \
--cc=donald.hunter@gmail.com \
--cc=edumazet@google.com \
--cc=gal@nvidia.com \
--cc=horms@kernel.org \
--cc=jiri@nvidia.com \
--cc=jiri@resnulli.us \
--cc=krzk@kernel.org \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=mbloch@nvidia.com \
--cc=moshe@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=rdunlap@infradead.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox