From: Tariq Toukan <tariqt@nvidia.com>
To: Eric Dumazet <edumazet@google.com>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Andrew Lunn <andrew+netdev@lunn.ch>,
"David S. Miller" <davem@davemloft.net>
Cc: Saeed Mahameed <saeed@kernel.org>, Gal Pressman <gal@nvidia.com>,
"Leon Romanovsky" <leon@kernel.org>,
Saeed Mahameed <saeedm@nvidia.com>,
"Tariq Toukan" <tariqt@nvidia.com>,
Mark Bloch <mbloch@nvidia.com>, Jonathan Corbet <corbet@lwn.net>,
<netdev@vger.kernel.org>, <linux-rdma@vger.kernel.org>,
<linux-doc@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
Dragos Tatulea <dtatulea@nvidia.com>
Subject: [PATCH net-next V2 3/3] net/mlx5e: Make PCIe congestion event thresholds configurable
Date: Thu, 10 Jul 2025 09:51:32 +0300 [thread overview]
Message-ID: <1752130292-22249-4-git-send-email-tariqt@nvidia.com> (raw)
In-Reply-To: <1752130292-22249-1-git-send-email-tariqt@nvidia.com>
From: Dragos Tatulea <dtatulea@nvidia.com>
Add a new sysfs entry for reading and configuring the PCIe congestion
event thresholds. The format is the following:
<inbound_low> <inbound_high> <outbound_low> <outbound_high>
Units are 0.01 %. Accepted values are in range (0, 10000].
When new thresholds are configured, a object modify operation will
happen. The set function is updated accordingly to act as a modify
as well.
The threshold configuration is stored and queried directly
in the firmware.
To prevent fat fingering the numbers, read them initially as u64.
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
.../mellanox/mlx5/core/en/pcie_cong_event.c | 152 +++++++++++++++++-
1 file changed, 144 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
index a24e5465ceeb..a74d1e15c92e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c
@@ -39,9 +39,13 @@ struct mlx5e_pcie_cong_event {
/* For ethtool stats group. */
struct mlx5e_pcie_cong_stats stats;
+
+ struct device_attribute attr;
};
/* In units of 0.01 % */
+#define MLX5E_PCIE_CONG_THRESH_MAX 10000
+
static const struct mlx5e_pcie_cong_thresh default_thresh_config = {
.inbound_high = 9000,
.inbound_low = 7500,
@@ -97,6 +101,7 @@ MLX5E_DEFINE_STATS_GRP(pcie_cong, 0);
static int
mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
const struct mlx5e_pcie_cong_thresh *config,
+ bool modify,
u64 *obj_id)
{
u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
@@ -108,8 +113,16 @@ mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
cong_obj = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, cong_obj);
- MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
- MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ if (!modify) {
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ } else {
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode,
+ MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, *obj_id);
+ MLX5_SET64(pcie_cong_event_obj, cong_obj, modify_select_field,
+ MLX5_PCIE_CONG_EVENT_MOD_THRESH);
+ }
MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type,
MLX5_GENERAL_OBJECT_TYPES_PCIE_CONG_EVENT);
@@ -131,10 +144,12 @@ mlx5_cmd_pcie_cong_event_set(struct mlx5_core_dev *dev,
if (err)
return err;
- *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ if (!modify)
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
- mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) created. Config: in: [%u, %u], out: [%u, %u]\n",
+ mlx5_core_dbg(dev, "PCIe congestion event (obj_id=%llu) %s. Config: in: [%u, %u], out: [%u, %u]\n",
*obj_id,
+ modify ? "modified" : "created",
config->inbound_high, config->inbound_low,
config->outbound_high, config->outbound_low);
@@ -160,13 +175,13 @@ static int mlx5_cmd_pcie_cong_event_destroy(struct mlx5_core_dev *dev,
static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
u64 obj_id,
- u32 *state)
+ u32 *state,
+ struct mlx5e_pcie_cong_thresh *config)
{
u32 in[MLX5_ST_SZ_DW(pcie_cong_event_cmd_in)] = {};
u32 out[MLX5_ST_SZ_DW(pcie_cong_event_cmd_out)];
void *obj;
void *hdr;
- u8 cong;
int err;
hdr = MLX5_ADDR_OF(pcie_cong_event_cmd_in, in, hdr);
@@ -184,6 +199,8 @@ static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
obj = MLX5_ADDR_OF(pcie_cong_event_cmd_out, out, cong_obj);
if (state) {
+ u8 cong;
+
cong = MLX5_GET(pcie_cong_event_obj, obj, inbound_cong_state);
if (cong == MLX5E_CONG_HIGH_STATE)
*state |= MLX5E_INBOUND_CONG;
@@ -193,6 +210,19 @@ static int mlx5_cmd_pcie_cong_event_query(struct mlx5_core_dev *dev,
*state |= MLX5E_OUTBOUND_CONG;
}
+ if (config) {
+ *config = (struct mlx5e_pcie_cong_thresh) {
+ .inbound_low = MLX5_GET(pcie_cong_event_obj, obj,
+ inbound_cong_low_threshold),
+ .inbound_high = MLX5_GET(pcie_cong_event_obj, obj,
+ inbound_cong_high_threshold),
+ .outbound_low = MLX5_GET(pcie_cong_event_obj, obj,
+ outbound_cong_low_threshold),
+ .outbound_high = MLX5_GET(pcie_cong_event_obj, obj,
+ outbound_cong_high_threshold),
+ };
+ }
+
return 0;
}
@@ -210,7 +240,7 @@ static void mlx5e_pcie_cong_event_work(struct work_struct *work)
dev = priv->mdev;
err = mlx5_cmd_pcie_cong_event_query(dev, cong_event->obj_id,
- &new_cong_state);
+ &new_cong_state, NULL);
if (err) {
mlx5_core_warn(dev, "Error %d when querying PCIe cong event object (obj_id=%llu).\n",
err, cong_event->obj_id);
@@ -249,6 +279,101 @@ static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb,
return NOTIFY_OK;
}
+static bool mlx5e_thresh_check_val(u64 val)
+{
+ return val > 0 && val <= MLX5E_PCIE_CONG_THRESH_MAX;
+}
+
+static bool
+mlx5e_thresh_config_check_order(const struct mlx5e_pcie_cong_thresh *config)
+{
+ if (config->inbound_high <= config->inbound_low)
+ return false;
+
+ if (config->outbound_high <= config->outbound_low)
+ return false;
+
+ return true;
+}
+
+#define MLX5E_PCIE_CONG_THRESH_SYSFS_VALUES 4
+
+static ssize_t thresh_config_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct mlx5e_pcie_cong_thresh config = {};
+ struct mlx5e_pcie_cong_event *cong_event;
+ u64 outbound_high, outbound_low;
+ u64 inbound_high, inbound_low;
+ struct mlx5e_priv *priv;
+ int ret;
+ int err;
+
+ cong_event = container_of(attr, struct mlx5e_pcie_cong_event, attr);
+ priv = cong_event->priv;
+
+ ret = sscanf(buf, "%llu %llu %llu %llu",
+ &inbound_low, &inbound_high,
+ &outbound_low, &outbound_high);
+ if (ret != MLX5E_PCIE_CONG_THRESH_SYSFS_VALUES) {
+ mlx5_core_err(priv->mdev, "Invalid format for PCIe congestion threshold configuration. Expected %d, got %d.\n",
+ MLX5E_PCIE_CONG_THRESH_SYSFS_VALUES, ret);
+ return -EINVAL;
+ }
+
+ if (!mlx5e_thresh_check_val(inbound_high) ||
+ !mlx5e_thresh_check_val(inbound_low) ||
+ !mlx5e_thresh_check_val(outbound_high) ||
+ !mlx5e_thresh_check_val(outbound_low)) {
+ mlx5_core_err(priv->mdev, "Invalid values for PCIe congestion threshold configuration. Valid range [1, %d]\n",
+ MLX5E_PCIE_CONG_THRESH_MAX);
+ return -EINVAL;
+ }
+
+ config = (struct mlx5e_pcie_cong_thresh) {
+ .inbound_low = inbound_low,
+ .inbound_high = inbound_high,
+ .outbound_low = outbound_low,
+ .outbound_high = outbound_high,
+
+ };
+
+ if (!mlx5e_thresh_config_check_order(&config)) {
+ mlx5_core_err(priv->mdev, "Invalid order of values for PCIe congestion threshold configuration.\n");
+ return -EINVAL;
+ }
+
+ err = mlx5_cmd_pcie_cong_event_set(priv->mdev, &config,
+ true, &cong_event->obj_id);
+
+ return err ? err : count;
+}
+
+static ssize_t thresh_config_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlx5e_pcie_cong_event *cong_event;
+ struct mlx5e_pcie_cong_thresh config;
+ struct mlx5e_priv *priv;
+ int err;
+
+ cong_event = container_of(attr, struct mlx5e_pcie_cong_event, attr);
+ priv = cong_event->priv;
+
+ err = mlx5_cmd_pcie_cong_event_query(priv->mdev, cong_event->obj_id,
+ NULL, &config);
+
+ if (err)
+ return err;
+
+ return sysfs_emit(buf, "%u %u %u %u\n",
+ config.inbound_low, config.inbound_high,
+ config.outbound_low, config.outbound_high);
+}
+
bool mlx5e_pcie_cong_event_supported(struct mlx5_core_dev *dev)
{
u64 features = MLX5_CAP_GEN_2_64(dev, general_obj_types_127_64);
@@ -283,7 +408,7 @@ int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
cong_event->priv = priv;
err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config,
- &cong_event->obj_id);
+ false, &cong_event->obj_id);
if (err) {
mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n");
goto err_free;
@@ -295,10 +420,20 @@ int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv)
goto err_obj_destroy;
}
+ cong_event->attr = (struct device_attribute)__ATTR_RW(thresh_config);
+ err = sysfs_create_file(&mdev->device->kobj,
+ &cong_event->attr.attr);
+ if (err) {
+ mlx5_core_warn(mdev, "Error creating a sysfs entry for pcie_cong limits.\n");
+ goto err_unregister_nb;
+ }
+
priv->cong_event = cong_event;
return 0;
+err_unregister_nb:
+ mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
err_obj_destroy:
mlx5_cmd_pcie_cong_event_destroy(mdev, cong_event->obj_id);
err_free:
@@ -316,6 +451,7 @@ void mlx5e_pcie_cong_event_cleanup(struct mlx5e_priv *priv)
return;
priv->cong_event = NULL;
+ sysfs_remove_file(&mdev->device->kobj, &cong_event->attr.attr);
mlx5_eq_notifier_unregister(mdev, &cong_event->nb);
cancel_work_sync(&cong_event->work);
--
2.31.1
next prev parent reply other threads:[~2025-07-10 6:52 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-07-10 6:51 [PATCH net-next V2 0/3] net/mlx5e: Add support for PCIe congestion events Tariq Toukan
2025-07-10 6:51 ` [PATCH net-next V2 1/3] net/mlx5e: Create/destroy PCIe Congestion Event object Tariq Toukan
2025-07-10 6:51 ` [PATCH net-next V2 2/3] net/mlx5e: Add device PCIe congestion ethtool stats Tariq Toukan
2025-07-11 2:06 ` kernel test robot
2025-07-11 23:25 ` Jakub Kicinski
2025-07-12 7:55 ` Dragos Tatulea
2025-07-14 15:26 ` Jakub Kicinski
2025-07-15 13:59 ` Tariq Toukan
2025-07-15 14:18 ` Jakub Kicinski
2025-07-10 6:51 ` Tariq Toukan [this message]
2025-07-11 23:30 ` [PATCH net-next V2 3/3] net/mlx5e: Make PCIe congestion event thresholds configurable Jakub Kicinski
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1752130292-22249-4-git-send-email-tariqt@nvidia.com \
--to=tariqt@nvidia.com \
--cc=andrew+netdev@lunn.ch \
--cc=corbet@lwn.net \
--cc=davem@davemloft.net \
--cc=dtatulea@nvidia.com \
--cc=edumazet@google.com \
--cc=gal@nvidia.com \
--cc=kuba@kernel.org \
--cc=leon@kernel.org \
--cc=linux-doc@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rdma@vger.kernel.org \
--cc=mbloch@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeed@kernel.org \
--cc=saeedm@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.