From: Saeed Mahameed <saeed@kernel.org>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
Eric Dumazet <edumazet@google.com>
Cc: Saeed Mahameed <saeedm@nvidia.com>,
netdev@vger.kernel.org, Tariq Toukan <tariqt@nvidia.com>
Subject: [net-next 06/15] net/mlx5: SD, Implement steering for primary and secondaries
Date: Wed, 20 Dec 2023 16:57:12 -0800 [thread overview]
Message-ID: <20231221005721.186607-7-saeed@kernel.org> (raw)
In-Reply-To: <20231221005721.186607-1-saeed@kernel.org>
From: Tariq Toukan <tariqt@nvidia.com>
Implement the needed SD steering adjustments for the primary and
secondaries.
While the SD multiple devices are used to avoid cross-numa memory, when
it comes to chip level all traffic goes only through the primary device.
The secondaries are forced to silent mode, to guarantee they are not
involved in any unexpected ingress/egress traffic.
In RX, secondary devices will not have steering objects. Traffic will be
steered from the primary device to the RQs of a secondary device using
advanced cross-vhca RX steering capabilities.
In TX, the primary creates a new TX flow table, which is aliased by the
secondaries.
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
.../net/ethernet/mellanox/mlx5/core/lib/sd.c | 185 +++++++++++++++++-
1 file changed, 184 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
index 19e674dd1af7..3309f21d892e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c
@@ -4,6 +4,7 @@
#include "lib/sd.h"
#include "mlx5_core.h"
#include "lib/mlx5.h"
+#include "fs_cmd.h"
#include <linux/mlx5/vport.h>
#define sd_info(__dev, format, ...) \
@@ -19,9 +20,11 @@ struct mlx5_sd {
union {
struct { /* primary */
struct mlx5_core_dev *secondaries[MLX5_SD_MAX_GROUP_SZ - 1];
+ struct mlx5_flow_table *tx_ft;
};
struct { /* secondary */
struct mlx5_core_dev *primary_dev;
+ u32 alias_obj_id;
};
};
};
@@ -78,6 +81,21 @@ struct mlx5_core_dev *mlx5_sd_ch_ix_get_dev(struct mlx5_core_dev *primary, int c
return mlx5_sd_primary_get_peer(primary, mdev_idx);
}
+static bool ft_create_alias_supported(struct mlx5_core_dev *dev)
+{
+ u64 obj_allowed = MLX5_CAP_GEN_2_64(dev, allowed_object_for_other_vhca_access);
+ u32 obj_supp = MLX5_CAP_GEN_2(dev, cross_vhca_object_to_object_supported);
+
+ if (!(obj_supp &
+ MLX5_CROSS_VHCA_OBJ_TO_OBJ_SUPPORTED_LOCAL_FLOW_TABLE_ROOT_TO_REMOTE_FLOW_TABLE))
+ return false;
+
+ if (!(obj_allowed & MLX5_ALLOWED_OBJ_FOR_OTHER_VHCA_ACCESS_FLOW_TABLE))
+ return false;
+
+ return true;
+}
+
static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
{
/* Feature is currently implemented for PFs only */
@@ -88,6 +106,24 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses)
if (host_buses > MLX5_SD_MAX_GROUP_SZ)
return false;
+ /* Disconnect secondaries from the network */
+ if (!MLX5_CAP_GEN(dev, eswitch_manager))
+ return false;
+ if (!MLX5_CAP_GEN(dev, silent_mode))
+ return false;
+
+ /* RX steering from primary to secondaries */
+ if (!MLX5_CAP_GEN(dev, cross_vhca_rqt))
+ return false;
+ if (host_buses > MLX5_CAP_GEN_2(dev, max_rqt_vhca_id))
+ return false;
+
+ /* TX steering from secondaries to primary */
+ if (!ft_create_alias_supported(dev))
+ return false;
+ if (!MLX5_CAP_FLOWTABLE_NIC_TX(dev, reset_root_to_default))
+ return false;
+
return true;
}
@@ -227,10 +263,122 @@ static void sd_unregister(struct mlx5_core_dev *dev)
mlx5_devcom_unregister_component(sd->devcom);
}
+static int sd_cmd_set_primary(struct mlx5_core_dev *primary, u8 *alias_key)
+{
+ struct mlx5_cmd_allow_other_vhca_access_attr allow_attr = {};
+ struct mlx5_sd *sd = mlx5_get_sd(primary);
+ struct mlx5_flow_table_attr ft_attr = {};
+ struct mlx5_flow_namespace *nic_ns;
+ struct mlx5_flow_table *ft;
+ int err;
+
+ nic_ns = mlx5_get_flow_namespace(primary, MLX5_FLOW_NAMESPACE_EGRESS);
+ if (!nic_ns)
+ return -EOPNOTSUPP;
+
+ ft = mlx5_create_flow_table(nic_ns, &ft_attr);
+ if (IS_ERR(ft)) {
+ err = PTR_ERR(ft);
+ return err;
+ }
+ sd->tx_ft = ft;
+ memcpy(allow_attr.access_key, alias_key, ACCESS_KEY_LEN);
+ allow_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS;
+ allow_attr.obj_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id;
+
+ err = mlx5_cmd_allow_other_vhca_access(primary, &allow_attr);
+ if (err) {
+ mlx5_core_err(primary, "Failed to allow other vhca access err=%d\n",
+ err);
+ mlx5_destroy_flow_table(ft);
+ return err;
+ }
+
+ return 0;
+}
+
+static void sd_cmd_unset_primary(struct mlx5_core_dev *primary)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(primary);
+
+ mlx5_destroy_flow_table(sd->tx_ft);
+}
+
+static int sd_secondary_create_alias_ft(struct mlx5_core_dev *secondary,
+ struct mlx5_core_dev *primary,
+ struct mlx5_flow_table *ft,
+ u32 *obj_id, u8 *alias_key)
+{
+ u32 aliased_object_id = (ft->type << FT_ID_FT_TYPE_OFFSET) | ft->id;
+ u16 vhca_id_to_be_accessed = MLX5_CAP_GEN(primary, vhca_id);
+ struct mlx5_cmd_alias_obj_create_attr alias_attr = {};
+ int ret;
+
+ memcpy(alias_attr.access_key, alias_key, ACCESS_KEY_LEN);
+ alias_attr.obj_id = aliased_object_id;
+ alias_attr.obj_type = MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS;
+ alias_attr.vhca_id = vhca_id_to_be_accessed;
+ ret = mlx5_cmd_alias_obj_create(secondary, &alias_attr, obj_id);
+ if (ret) {
+ mlx5_core_err(secondary, "Failed to create alias object err=%d\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void sd_secondary_destroy_alias_ft(struct mlx5_core_dev *secondary)
+{
+ struct mlx5_sd *sd = mlx5_get_sd(secondary);
+
+ mlx5_cmd_alias_obj_destroy(secondary, sd->alias_obj_id,
+ MLX5_GENERAL_OBJECT_TYPES_FLOW_TABLE_ALIAS);
+}
+
+static int sd_cmd_set_secondary(struct mlx5_core_dev *secondary,
+ struct mlx5_core_dev *primary,
+ u8 *alias_key)
+{
+ struct mlx5_sd *primary_sd = mlx5_get_sd(primary);
+ struct mlx5_sd *sd = mlx5_get_sd(secondary);
+ int err;
+
+ err = mlx5_fs_cmd_set_l2table_entry_silent(secondary, 1);
+ if (err)
+ return err;
+
+ err = sd_secondary_create_alias_ft(secondary, primary, primary_sd->tx_ft,
+ &sd->alias_obj_id, alias_key);
+ if (err)
+ goto err_unset_silent;
+
+ err = mlx5_fs_cmd_set_tx_flow_table_root(secondary, sd->alias_obj_id, false);
+ if (err)
+ goto err_destroy_alias_ft;
+
+ return 0;
+
+err_destroy_alias_ft:
+ sd_secondary_destroy_alias_ft(secondary);
+err_unset_silent:
+ mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
+ return err;
+}
+
+static void sd_cmd_unset_secondary(struct mlx5_core_dev *secondary)
+{
+ mlx5_fs_cmd_set_tx_flow_table_root(secondary, 0, true);
+ sd_secondary_destroy_alias_ft(secondary);
+ mlx5_fs_cmd_set_l2table_entry_silent(secondary, 0);
+}
+
int mlx5_sd_init(struct mlx5_core_dev *dev)
{
+ struct mlx5_core_dev *primary, *pos, *to;
struct mlx5_sd *sd = mlx5_get_sd(dev);
- int err;
+ u8 alias_key[ACCESS_KEY_LEN];
+ int err, i;
err = sd_init(dev);
if (err)
@@ -244,8 +392,33 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
if (err)
goto err_sd_cleanup;
+ if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ return 0;
+
+ primary = mlx5_sd_get_primary(dev);
+
+ for (i = 0; i < ACCESS_KEY_LEN; i++)
+ alias_key[i] = get_random_u8();
+
+ err = sd_cmd_set_primary(primary, alias_key);
+ if (err)
+ goto err_sd_unregister;
+
+ mlx5_sd_for_each_secondary(i, primary, pos) {
+ err = sd_cmd_set_secondary(pos, primary, alias_key);
+ if (err)
+ goto err_unset_secondaries;
+ }
+
return 0;
+err_unset_secondaries:
+ to = pos;
+ mlx5_sd_for_each_secondary_to(i, primary, to, pos)
+ sd_cmd_unset_secondary(pos);
+ sd_cmd_unset_primary(primary);
+err_sd_unregister:
+ sd_unregister(dev);
err_sd_cleanup:
sd_cleanup(dev);
return err;
@@ -254,10 +427,20 @@ int mlx5_sd_init(struct mlx5_core_dev *dev)
void mlx5_sd_cleanup(struct mlx5_core_dev *dev)
{
struct mlx5_sd *sd = mlx5_get_sd(dev);
+ struct mlx5_core_dev *primary, *pos;
+ int i;
if (!sd)
return;
+ if (!mlx5_devcom_comp_is_ready(sd->devcom))
+ goto out;
+
+ primary = mlx5_sd_get_primary(dev);
+ mlx5_sd_for_each_secondary(i, primary, pos)
+ sd_cmd_unset_secondary(pos);
+ sd_cmd_unset_primary(primary);
+out:
sd_unregister(dev);
sd_cleanup(dev);
}
--
2.43.0
next prev parent reply other threads:[~2023-12-21 0:57 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-21 0:57 [pull request][net-next 00/15] mlx5 updates 2023-12-20 Saeed Mahameed
2023-12-21 0:57 ` [net-next 01/15] net/mlx5e: Use the correct lag ports number when creating TISes Saeed Mahameed
2023-12-29 22:40 ` patchwork-bot+netdevbpf
2023-12-21 0:57 ` [net-next 02/15] net/mlx5: Fix query of sd_group field Saeed Mahameed
2023-12-21 0:57 ` [net-next 03/15] net/mlx5: SD, Introduce SD lib Saeed Mahameed
2023-12-21 0:57 ` [net-next 04/15] net/mlx5: SD, Implement basic query and instantiation Saeed Mahameed
2024-01-05 12:15 ` Jiri Pirko
2024-01-25 7:34 ` Tariq Toukan
2024-01-29 9:21 ` Jiri Pirko
2023-12-21 0:57 ` [net-next 05/15] net/mlx5: SD, Implement devcom communication and primary election Saeed Mahameed
2023-12-21 0:57 ` Saeed Mahameed [this message]
2023-12-21 0:57 ` [net-next 07/15] net/mlx5: SD, Add informative prints in kernel log Saeed Mahameed
2024-01-05 12:12 ` Jiri Pirko
2024-01-25 7:42 ` Tariq Toukan
2024-01-29 9:20 ` Jiri Pirko
2023-12-21 0:57 ` [net-next 08/15] net/mlx5e: Create single netdev per SD group Saeed Mahameed
2024-01-08 13:36 ` Aishwarya TCV
2024-01-08 13:50 ` Gal Pressman
2024-01-08 15:54 ` Mark Brown
2024-01-08 16:00 ` Gal Pressman
2023-12-21 0:57 ` [net-next 09/15] net/mlx5e: Create EN core HW resources for all secondary devices Saeed Mahameed
2023-12-21 0:57 ` [net-next 10/15] net/mlx5e: Let channels be SD-aware Saeed Mahameed
2024-01-04 22:50 ` Jakub Kicinski
2024-01-08 12:30 ` Gal Pressman
2024-01-09 3:08 ` Jakub Kicinski
2024-01-09 14:15 ` Gal Pressman
2024-01-09 16:00 ` Jakub Kicinski
2024-01-10 14:09 ` Gal Pressman
2024-01-25 8:01 ` Tariq Toukan
2024-01-26 2:40 ` Jakub Kicinski
2023-12-21 0:57 ` [net-next 11/15] net/mlx5e: Support cross-vhca RSS Saeed Mahameed
2023-12-21 0:57 ` [net-next 12/15] net/mlx5e: Support per-mdev queue counter Saeed Mahameed
2023-12-21 0:57 ` [net-next 13/15] net/mlx5e: Block TLS device offload on combined SD netdev Saeed Mahameed
2023-12-21 0:57 ` [net-next 14/15] net/mlx5: Enable SD feature Saeed Mahameed
2023-12-21 0:57 ` [net-next 15/15] net/mlx5: Implement management PF Ethernet profile Saeed Mahameed
2023-12-21 2:45 ` Nelson, Shannon
2023-12-21 22:25 ` Saeed Mahameed
2024-01-04 22:44 ` Jakub Kicinski
2024-01-08 23:22 ` Saeed Mahameed
2024-01-09 2:58 ` Jakub Kicinski
2024-01-17 7:37 ` Saeed Mahameed
2024-01-18 2:04 ` Jakub Kicinski
2024-01-04 22:47 ` [pull request][net-next 00/15] mlx5 updates 2023-12-20 Jakub Kicinski
2024-01-08 1:19 ` Jakub Kicinski
2024-01-08 23:14 ` Saeed Mahameed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231221005721.186607-7-saeed@kernel.org \
--to=saeed@kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=saeedm@nvidia.com \
--cc=tariqt@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).