From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org, Dmytro Linkin <dlinkin@nvidia.com>,
Vlad Buslov <vladbu@nvidia.com>, Huy Nguyen <huyn@nvidia.com>,
Mark Bloch <mbloch@nvidia.com>, Parav Pandit <parav@nvidia.com>,
Saeed Mahameed <saeedm@nvidia.com>
Subject: [net-next 14/15] net/mlx5: E-switch, Allow to add vports to rate groups
Date: Thu, 19 Aug 2021 21:55:14 -0700 [thread overview]
Message-ID: <20210820045515.265297-15-saeed@kernel.org> (raw)
In-Reply-To: <20210820045515.265297-1-saeed@kernel.org>
From: Dmytro Linkin <dlinkin@nvidia.com>
Implement eswitch API that allows updating rate groups. If group
pointer is NULL, then move the vport to internal unlimited group zero.
Implement devlink_ops->rate_parent_node_set() callback in the terms of
the new eswitch group update API.
Enable QoS for all group's elements if a group has allocated BW share.
Co-developed-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Dmytro Linkin <dlinkin@nvidia.com>
Reviewed-by: Huy Nguyen <huyn@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Reviewed-by: Parav Pandit <parav@nvidia.com>
Reviewed-by: Saeed Mahameed <saeedm@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/devlink.c | 1 +
.../mellanox/mlx5/core/esw/devlink_port.c | 8 +-
.../net/ethernet/mellanox/mlx5/core/esw/qos.c | 206 ++++++++++++++++--
.../net/ethernet/mellanox/mlx5/core/esw/qos.h | 4 +
.../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +
5 files changed, 199 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index e41b7d7cf654..e84287ffc7ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -299,6 +299,7 @@ static const struct devlink_ops mlx5_devlink_ops = {
.rate_node_tx_max_set = mlx5_esw_devlink_rate_node_tx_max_set,
.rate_node_new = mlx5_esw_devlink_rate_node_new,
.rate_node_del = mlx5_esw_devlink_rate_node_del,
+ .rate_leaf_parent_set = mlx5_esw_devlink_rate_parent_set,
#endif
#ifdef CONFIG_MLX5_SF_MANAGER
.port_new = mlx5_devlink_sf_port_new,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index bbfc498cb3dd..20af557ae30c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -116,8 +116,10 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo
if (IS_ERR(vport))
return;
- if (vport->dl_port->devlink_rate)
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
devlink_rate_leaf_destroy(vport->dl_port);
+ }
devlink_port_unregister(vport->dl_port);
mlx5_esw_dl_port_free(vport->dl_port);
@@ -178,8 +180,10 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num
if (IS_ERR(vport))
return;
- if (vport->dl_port->devlink_rate)
+ if (vport->dl_port->devlink_rate) {
+ mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL);
devlink_rate_leaf_destroy(vport->dl_port);
+ }
devlink_port_unregister(vport->dl_port);
vport->dl_port = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 138b11073278..692c9d543f75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -63,20 +63,23 @@ static int esw_qos_vport_config(struct mlx5_eswitch *esw,
struct netlink_ext_ack *extack)
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
void *vport_elem;
int err;
if (!vport->qos.enabled)
return -EIO;
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx,
element_attributes);
MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- err = esw_qos_tsar_config(dev, sched_ctx, esw->qos.root_tsar_ix, vport->qos.esw_tsar_ix,
+ err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix,
max_rate, bw_share);
if (err) {
esw_warn(esw->dev,
@@ -109,7 +112,7 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
} else {
mlx5_esw_for_each_vport(esw, i, evport) {
if (!evport->enabled || !evport->qos.enabled ||
- evport->qos.min_rate < max_guarantee)
+ evport->qos.group != group || evport->qos.min_rate < max_guarantee)
continue;
max_guarantee = evport->qos.min_rate;
}
@@ -117,6 +120,12 @@ static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw,
if (max_guarantee)
return max_t(u32, max_guarantee / fw_max_bw_share, 1);
+
+ /* If vports min rate divider is 0 but their group has bw_share configured, then
+ * need to set bw_share for vports to minimal value.
+ */
+ if (!group_level && !max_guarantee && group->bw_share)
+ return 1;
return 0;
}
@@ -140,7 +149,7 @@ static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw,
int err;
mlx5_esw_for_each_vport(esw, i, evport) {
- if (!evport->enabled || !evport->qos.enabled)
+ if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group)
continue;
bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share);
@@ -176,6 +185,14 @@ static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divid
return err;
group->bw_share = bw_share;
+
+ /* All the group's vports need to be set with default bw_share
+ * to enable them with QOS
+ */
+ err = esw_qos_normalize_vports_min_rate(esw, group, extack);
+
+ if (err)
+ return err;
}
return 0;
@@ -201,7 +218,7 @@ int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw,
previous_min_rate = evport->qos.min_rate;
evport->qos.min_rate = min_rate;
- err = esw_qos_normalize_vports_min_rate(esw, NULL, extack);
+ err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack);
if (err)
evport->qos.min_rate = previous_min_rate;
@@ -213,6 +230,7 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
u32 max_rate,
struct netlink_ext_ack *extack)
{
+ u32 act_max_rate = max_rate;
bool max_rate_supported;
int err;
@@ -224,7 +242,13 @@ int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw,
if (max_rate == evport->qos.max_rate)
return 0;
- err = esw_qos_vport_config(esw, evport, max_rate, evport->qos.bw_share, extack);
+ /* If parent group has rate limit need to set to group
+ * value when new max rate is 0.
+ */
+ if (evport->qos.group && !max_rate)
+ act_max_rate = evport->qos.group->max_rate;
+
+ err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack);
if (!err)
evport->qos.max_rate = max_rate;
@@ -267,6 +291,8 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
struct mlx5_esw_rate_group *group,
u32 max_rate, struct netlink_ext_ack *extack)
{
+ struct mlx5_vport *vport;
+ unsigned long i;
int err;
if (group->max_rate == max_rate)
@@ -278,9 +304,127 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
group->max_rate = max_rate;
+ /* Any unlimited vports in the group should be set
+ * with the value of the group.
+ */
+ mlx5_esw_for_each_vport(esw, i, vport) {
+ if (!vport->enabled || !vport->qos.enabled ||
+ vport->qos.group != group || vport->qos.max_rate)
+ continue;
+
+ err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack);
+ if (err)
+ NL_SET_ERR_MSG_MOD(extack,
+ "E-Switch vport implicit rate limit setting failed");
+ }
+
+ return err;
+}
+
+static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ u32 max_rate, u32 bw_share)
+{
+ u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
+ struct mlx5_esw_rate_group *group = vport->qos.group;
+ struct mlx5_core_dev *dev = esw->dev;
+ u32 parent_tsar_ix;
+ void *vport_elem;
+ int err;
+
+ parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
+ MLX5_SET(scheduling_context, sched_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+ vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
+ MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
+ MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix);
+ MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
+ MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+
+ err = mlx5_create_scheduling_element_cmd(dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ sched_ctx,
+ &vport->qos.esw_tsar_ix);
+ if (err) {
+ esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+ vport->vport, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *curr_group,
+ struct mlx5_esw_rate_group *new_group,
+ struct netlink_ext_ack *extack)
+{
+ u32 max_rate;
+ int err;
+
+ err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+ SCHEDULING_HIERARCHY_E_SWITCH,
+ vport->qos.esw_tsar_ix);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed");
+ return err;
+ }
+
+ vport->qos.group = new_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate;
+
+ /* If vport is unlimited, we set the group's value.
+ * Therefore, if the group is limited it will apply to
+ * the vport as well and if not, vport will remain unlimited.
+ */
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share);
+ if (err) {
+ NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed.");
+ goto err_sched;
+ }
+
+ return 0;
+
+err_sched:
+ vport->qos.group = curr_group;
+ max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate;
+ if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share))
+ esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n",
+ vport->vport);
+
return err;
}
+static int esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *new_group, *curr_group;
+ int err;
+
+ if (!vport->enabled)
+ return -EINVAL;
+
+ curr_group = vport->qos.group;
+ new_group = group ?: esw->qos.group0;
+ if (curr_group == new_group)
+ return 0;
+
+ err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack);
+ if (err)
+ return err;
+
+ /* Recalculate bw share weights of old and new groups */
+ if (vport->qos.bw_share) {
+ esw_qos_normalize_vports_min_rate(esw, curr_group, extack);
+ esw_qos_normalize_vports_min_rate(esw, new_group, extack);
+ }
+
+ return 0;
+}
+
static struct mlx5_esw_rate_group *
esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
{
@@ -457,9 +601,6 @@ void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw)
int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
u32 max_rate, u32 bw_share)
{
- u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
- struct mlx5_core_dev *dev = esw->dev;
- void *vport_elem;
int err;
lockdep_assert_held(&esw->state_lock);
@@ -469,22 +610,10 @@ int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport
if (vport->qos.enabled)
return -EEXIST;
- MLX5_SET(scheduling_context, sched_ctx, element_type,
- SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
- vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes);
- MLX5_SET(vport_element, vport_elem, vport_number, vport->vport);
- MLX5_SET(scheduling_context, sched_ctx, parent_element_id, esw->qos.root_tsar_ix);
- MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate);
- MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share);
+ vport->qos.group = esw->qos.group0;
- err = mlx5_create_scheduling_element_cmd(dev,
- SCHEDULING_HIERARCHY_E_SWITCH,
- sched_ctx,
- &vport->qos.esw_tsar_ix);
- if (err)
- esw_warn(dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
- vport->vport, err);
- else
+ err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share);
+ if (!err)
vport->qos.enabled = true;
return err;
@@ -497,6 +626,8 @@ void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vpo
lockdep_assert_held(&esw->state_lock);
if (!esw->qos.enabled || !vport->qos.enabled)
return;
+ WARN(vport->qos.group && vport->qos.group != esw->qos.group0,
+ "Disabling QoS on port before detaching it from group");
err = mlx5_destroy_scheduling_element_cmd(esw->dev,
SCHEDULING_HIERARCHY_E_SWITCH,
@@ -696,3 +827,32 @@ int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
mutex_unlock(&esw->state_lock);
return err;
}
+
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ mutex_lock(&esw->state_lock);
+ err = esw_qos_vport_update_group(esw, vport, group, extack);
+ mutex_unlock(&esw->state_lock);
+ return err;
+}
+
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack)
+{
+ struct mlx5_esw_rate_group *group;
+ struct mlx5_vport *vport = priv;
+
+ if (!parent)
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch,
+ vport, NULL, extack);
+
+ group = parent_priv;
+ return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
index b2e301a436bd..28451abe2d2f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -32,6 +32,10 @@ int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv,
struct netlink_ext_ack *extack);
int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv,
struct netlink_ext_ack *extack);
+int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate,
+ struct devlink_rate *parent,
+ void *priv, void *parent_priv,
+ struct netlink_ext_ack *extack);
#endif
#endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index d7cfad168312..2c7444101bb9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -177,6 +177,7 @@ struct mlx5_vport {
u32 bw_share;
u32 min_rate;
u32 max_rate;
+ struct mlx5_esw_rate_group *group;
} qos;
u16 vport;
@@ -356,6 +357,10 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
u16 vport_num, bool setting);
int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport,
u32 max_rate, u32 min_rate);
+int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw,
+ struct mlx5_vport *vport,
+ struct mlx5_esw_rate_group *group,
+ struct netlink_ext_ack *extack);
int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting);
int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting);
int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
--
2.31.1
next prev parent reply other threads:[~2021-08-20 4:55 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-20 4:55 [pull request][net-next 00/15] mlx5 updates 2021-08-19 Saeed Mahameed
2021-08-20 4:55 ` [net-next 01/15] net/mlx5e: Remove mlx5e dependency from E-Switch sample Saeed Mahameed
2021-08-20 12:52 ` patchwork-bot+netdevbpf
2021-08-20 4:55 ` [net-next 02/15] net/mlx5e: Move esw/sample to en/tc/sample Saeed Mahameed
2021-08-20 4:55 ` [net-next 03/15] net/mlx5e: Move sample attribute to flow attribute Saeed Mahameed
2021-08-20 4:55 ` [net-next 04/15] net/mlx5e: CT, Use xarray to manage fte ids Saeed Mahameed
2021-08-20 4:55 ` [net-next 05/15] net/mlx5e: Introduce post action infrastructure Saeed Mahameed
2021-08-20 4:55 ` [net-next 06/15] net/mlx5e: Refactor ct to use " Saeed Mahameed
2021-08-20 4:55 ` [net-next 07/15] net/mlx5e: TC, Remove CONFIG_NET_TC_SKB_EXT dependency when restoring tunnel Saeed Mahameed
2021-08-20 4:55 ` [net-next 08/15] net/mlx5e: TC, Restore tunnel info for sample offload Saeed Mahameed
2021-08-20 4:55 ` [net-next 09/15] net/mlx5e: TC, Support sample offload action for tunneled traffic Saeed Mahameed
2021-08-20 4:55 ` [net-next 10/15] net/mlx5: E-switch, Move QoS related code to dedicated file Saeed Mahameed
2021-08-20 4:55 ` [net-next 11/15] net/mlx5: E-switch, Enable devlink port tx_{share|max} rate control Saeed Mahameed
2021-08-20 4:55 ` [net-next 12/15] net/mlx5: E-switch, Introduce rate limiting groups API Saeed Mahameed
2021-08-20 4:55 ` [net-next 13/15] net/mlx5: E-switch, Allow setting share/max tx rate limits of rate groups Saeed Mahameed
2021-08-20 4:55 ` Saeed Mahameed [this message]
2021-08-20 4:55 ` [net-next 15/15] net/mlx5: E-switch, Add QoS tracepoints Saeed Mahameed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210820045515.265297-15-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=dlinkin@nvidia.com \
--cc=huyn@nvidia.com \
--cc=kuba@kernel.org \
--cc=mbloch@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=parav@nvidia.com \
--cc=saeedm@nvidia.com \
--cc=vladbu@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).