public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Andrew Lunn <andrew+netdev@lunn.ch>,
	"David S. Miller" <davem@davemloft.net>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
	Leon Romanovsky <leon@kernel.org>,
	Tariq Toukan <tariqt@nvidia.com>, Mark Bloch <mbloch@nvidia.com>,
	<netdev@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, Gal Pressman <gal@nvidia.com>,
	Moshe Shemesh <moshe@nvidia.com>,
	Alexei Lazar <alazar@nvidia.com>,
	Dragos Tatulea <dtatulea@nvidia.com>
Subject: [PATCH net-next] net/mlx5e: Extend TC max ratelimit
Date: Tue, 3 Feb 2026 09:30:20 +0200	[thread overview]
Message-ID: <20260203073021.1710806-1-tariqt@nvidia.com> (raw)

From: Alexei Lazar <alazar@nvidia.com>

The per-TC rate limit was restricted to 255 Gbps due to the 8-bit
max_bw_value field in the QETC register.
This limit is insufficient for newer, higher-bandwidth NICs.

Extend the rate limit by using the full 16-bit max_bw_value field.
This allows the finer 100Mbps granularity to be used for rates up to
~6.5 Tbps, instead of switching to 1Gbps granularity at higher rates.

The extended range is only used when the device advertises support
via the qetcr_qshr_max_bw_val_msb capability bit in the QCAM register.

Signed-off-by: Alexei Lazar <alazar@nvidia.com>
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Gal Pressman <gal@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/en/dcbnl.h    |  4 ++
 .../ethernet/mellanox/mlx5/core/en_dcbnl.c    | 66 ++++++++++---------
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  4 +-
 .../net/ethernet/mellanox/mlx5/core/port.c    |  4 +-
 4 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
index 2c98a5299df3..6bd959f9083d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -29,6 +29,10 @@ struct mlx5e_dcbx {
 	u32                        cable_len;
 	u32                        xoff;
 	u16                        port_buff_cell_sz;
+
+	/* Upper limit for 100Mbps and 1Gbps in Kbps units */
+	u64                        upper_limit_100mbps;
+	u64                        upper_limit_gbps;
 };
 
 #define MLX5E_MAX_DSCP (64)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index fddf7c207f8e..4b86df6d5b9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -58,6 +58,20 @@ enum {
 	MLX5_DCB_CHG_NO_RESET,
 };
 
+static const struct {
+	int scale;
+	const char *units_str;
+} mlx5e_bw_units[] = {
+	[MLX5_100_MBPS_UNIT] = {
+		.scale = 100,
+		.units_str = "Mbps",
+	},
+	[MLX5_GBPS_UNIT] = {
+		.scale = 1,
+		.units_str = "Gbps",
+	},
+};
+
 #define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg)  && \
 				   MLX5_CAP_QCAM_REG(mdev, qpts) && \
 				   MLX5_CAP_QCAM_REG(mdev, qpdpm))
@@ -559,7 +573,7 @@ static int mlx5e_dcbnl_ieee_getmaxrate(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+	u16 max_bw_value[IEEE_8021QAZ_MAX_TCS];
 	u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
 	int err;
 	int i;
@@ -594,57 +608,41 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv    = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
-	u8 max_bw_value[IEEE_8021QAZ_MAX_TCS];
+	u16 max_bw_value[IEEE_8021QAZ_MAX_TCS];
 	u8 max_bw_unit[IEEE_8021QAZ_MAX_TCS];
-	u64 upper_limit_100mbps;
-	u64 upper_limit_gbps;
 	int i;
-	struct {
-		int scale;
-		const char *units_str;
-	} units[] = {
-		[MLX5_100_MBPS_UNIT] = {
-			.scale = 100,
-			.units_str = "Mbps",
-		},
-		[MLX5_GBPS_UNIT] = {
-			.scale = 1,
-			.units_str = "Gbps",
-		},
-	};
 
 	memset(max_bw_value, 0, sizeof(max_bw_value));
 	memset(max_bw_unit, 0, sizeof(max_bw_unit));
-	upper_limit_100mbps = U8_MAX * MLX5E_100MB_TO_KB;
-	upper_limit_gbps = U8_MAX * MLX5E_1GB_TO_KB;
 
 	for (i = 0; i <= mlx5_max_tc(mdev); i++) {
-		if (!maxrate->tc_maxrate[i]) {
+		u64 rate = maxrate->tc_maxrate[i];
+
+		if (!rate) {
 			max_bw_unit[i]  = MLX5_BW_NO_LIMIT;
 			continue;
 		}
-		if (maxrate->tc_maxrate[i] <= upper_limit_100mbps) {
-			max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
-						  MLX5E_100MB_TO_KB);
+		if (rate <= priv->dcbx.upper_limit_100mbps) {
+			max_bw_value[i] = div_u64(rate, MLX5E_100MB_TO_KB);
 			max_bw_value[i] = max_bw_value[i] ? max_bw_value[i] : 1;
 			max_bw_unit[i]  = MLX5_100_MBPS_UNIT;
-		} else if (maxrate->tc_maxrate[i] <= upper_limit_gbps) {
-			max_bw_value[i] = div_u64(maxrate->tc_maxrate[i],
-						  MLX5E_1GB_TO_KB);
+		} else if (rate <= priv->dcbx.upper_limit_gbps) {
+			max_bw_value[i] = div_u64(rate, MLX5E_1GB_TO_KB);
 			max_bw_unit[i]  = MLX5_GBPS_UNIT;
 		} else {
 			netdev_err(netdev,
 				   "tc_%d maxrate %llu Kbps exceeds limit %llu\n",
-				   i, maxrate->tc_maxrate[i],
-				   upper_limit_gbps);
+				   i, rate, priv->dcbx.upper_limit_gbps);
 			return -EINVAL;
 		}
 	}
 
 	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		u8 unit = max_bw_unit[i];
+
 		netdev_dbg(netdev, "%s: tc_%d <=> max_bw %u %s\n", __func__, i,
-			   max_bw_value[i] * units[max_bw_unit[i]].scale,
-			   units[max_bw_unit[i]].units_str);
+			   max_bw_value[i] * mlx5e_bw_units[unit].scale,
+			   mlx5e_bw_units[unit].units_str);
 	}
 
 	return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
@@ -1268,6 +1266,8 @@ static u16 mlx5e_query_port_buffers_cell_size(struct mlx5e_priv *priv)
 void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
 {
 	struct mlx5e_dcbx *dcbx = &priv->dcbx;
+	bool max_bw_msb_supported;
+	u16 type_max;
 
 	mlx5e_trust_initialize(priv);
 
@@ -1285,5 +1285,11 @@ void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
 	priv->dcbx.port_buff_cell_sz = mlx5e_query_port_buffers_cell_size(priv);
 	priv->dcbx.cable_len = MLX5E_DEFAULT_CABLE_LEN;
 
+	max_bw_msb_supported = MLX5_CAP_QCAM_FEATURE(priv->mdev,
+						     qetcr_qshr_max_bw_val_msb);
+	type_max = max_bw_msb_supported ? U16_MAX : U8_MAX;
+	priv->dcbx.upper_limit_100mbps = type_max * MLX5E_100MB_TO_KB;
+	priv->dcbx.upper_limit_gbps = type_max * MLX5E_1GB_TO_KB;
+
 	mlx5e_ets_init(priv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index e4b0aa16c1d2..b635b423d972 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -345,10 +345,10 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
 int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
 				u8 tc, u8 *bw_pct);
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
-				    u8 *max_bw_value,
+				    u16 *max_bw_value,
 				    u8 *max_bw_unit);
 int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
-				   u8 *max_bw_value,
+				   u16 *max_bw_value,
 				   u8 *max_bw_unit);
 int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode);
 int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index e8a0884ea477..9fca591652f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -773,7 +773,7 @@ int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
 }
 
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
-				    u8 *max_bw_value,
+				    u16 *max_bw_value,
 				    u8 *max_bw_units)
 {
 	u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
@@ -796,7 +796,7 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 }
 
 int mlx5_query_port_ets_rate_limit(struct mlx5_core_dev *mdev,
-				   u8 *max_bw_value,
+				   u16 *max_bw_value,
 				   u8 *max_bw_units)
 {
 	u32 out[MLX5_ST_SZ_DW(qetc_reg)];

base-commit: a22f57757f7e88c890499265c383ecb32900b645
-- 
2.44.0


             reply	other threads:[~2026-02-03  7:31 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-02-03  7:30 Tariq Toukan [this message]
2026-02-03  7:30 ` [PATCH net-next] net/mlx5e: Extend TC max ratelimit using max_bw_value_msb Tariq Toukan
2026-02-03  7:46   ` Tariq Toukan
2026-02-05  5:30   ` patchwork-bot+netdevbpf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260203073021.1710806-1-tariqt@nvidia.com \
    --to=tariqt@nvidia.com \
    --cc=alazar@nvidia.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=davem@davemloft.net \
    --cc=dtatulea@nvidia.com \
    --cc=edumazet@google.com \
    --cc=gal@nvidia.com \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=mbloch@nvidia.com \
    --cc=moshe@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=saeedm@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox