* [PATCH mlx5-next 1/3] net/mlx5: Query ADV_RDMA capabilities
2025-01-02 11:36 [PATCH rdma-next 0/3] Add RDMA TRANSPORT steering domain Leon Romanovsky
@ 2025-01-02 11:36 ` Leon Romanovsky
2025-01-02 11:36 ` [PATCH mlx5-next 2/3] net/mlx5: fs, add RDMA TRANSPORT steering domain support Leon Romanovsky
2025-01-02 11:36 ` [PATCH rdma-next 3/3] RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace Leon Romanovsky
2 siblings, 0 replies; 4+ messages in thread
From: Leon Romanovsky @ 2025-01-02 11:36 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Patrisious Haddad, Andrew Lunn, Eric Dumazet, Jakub Kicinski,
linux-rdma, Mark Bloch, netdev, Paolo Abeni, Saeed Mahameed,
Tariq Toukan
From: Patrisious Haddad <phaddad@nvidia.com>
Query ADV_RDMA capabilities which provide information for
advanced RDMA related features.
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/fw.c | 7 ++++
.../net/ethernet/mellanox/mlx5/core/main.c | 1 +
include/linux/mlx5/device.h | 5 +++
include/linux/mlx5/mlx5_ifc.h | 42 ++++++++++++++++++-
4 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 76ad46bf477d6..bc3306dec7b53 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -281,6 +281,13 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, adv_rdma)) {
+ err = mlx5_core_get_caps_mode(dev, MLX5_CAP_ADV_RDMA,
+ HCA_CAP_OPMOD_GET_CUR);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 220a9ac75c8ba..9faae519ebdd0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1788,6 +1788,7 @@ static const int types[] = {
MLX5_CAP_MACSEC,
MLX5_CAP_ADV_VIRTUALIZATION,
MLX5_CAP_CRYPTO,
+ MLX5_CAP_ADV_RDMA,
};
static void mlx5_hca_caps_free(struct mlx5_core_dev *dev)
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index cc647992f3d1e..da5bcde853da3 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1249,6 +1249,7 @@ enum mlx5_cap_type {
MLX5_CAP_GENERAL_2 = 0x20,
MLX5_CAP_PORT_SELECTION = 0x25,
MLX5_CAP_ADV_VIRTUALIZATION = 0x26,
+ MLX5_CAP_ADV_RDMA = 0x28,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1382,6 +1383,10 @@ enum mlx5_qcam_feature_groups {
MLX5_GET(adv_virtualization_cap, \
mdev->caps.hca[MLX5_CAP_ADV_VIRTUALIZATION]->cur, cap)
+#define MLX5_CAP_ADV_RDMA(mdev, cap) \
+ MLX5_GET(adv_rdma_cap, \
+ mdev->caps.hca[MLX5_CAP_ADV_RDMA]->cur, cap)
+
#define MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) \
MLX5_CAP_PORT_SELECTION(mdev, flow_table_properties_port_selection.cap)
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 48d47181c7cd1..a01abf16e495e 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1986,7 +1986,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 max_geneve_tlv_options[0x8];
u8 reserved_at_568[0x3];
u8 max_geneve_tlv_option_data_len[0x5];
- u8 reserved_at_570[0x9];
+ u8 reserved_at_570[0x1];
+ u8 adv_rdma[0x1];
+ u8 reserved_at_572[0x7];
u8 adv_virtualization[0x1];
u8 reserved_at_57a[0x6];
@@ -12992,6 +12994,44 @@ struct mlx5_ifc_load_vhca_state_out_bits {
u8 reserved_at_40[0x40];
};
+struct mlx5_ifc_adv_rdma_cap_bits {
+ u8 rdma_transport_manager[0x1];
+ u8 rdma_transport_manager_other_eswitch[0x1];
+ u8 reserved_at_2[0x1e];
+
+ u8 rcx_type[0x8];
+ u8 reserved_at_28[0x2];
+ u8 ps_entry_log_max_value[0x6];
+ u8 reserved_at_30[0x6];
+ u8 qp_max_ps_num_entry[0xa];
+
+ u8 mp_max_num_queues[0x8];
+ u8 ps_user_context_max_log_size[0x8];
+ u8 message_based_qp_and_striding_wq[0x8];
+ u8 reserved_at_58[0x8];
+
+ u8 max_receive_send_message_size_stride[0x10];
+ u8 reserved_at_70[0x10];
+
+ u8 max_receive_send_message_size_byte[0x20];
+
+ u8 reserved_at_a0[0x160];
+
+ struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_rx_flow_table_properties;
+
+ struct mlx5_ifc_flow_table_prop_layout_bits rdma_transport_tx_flow_table_properties;
+
+ struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_support_2;
+
+ struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_support_2;
+
+ struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_rx_ft_field_bitmask_support_2;
+
+ struct mlx5_ifc_flow_table_fields_supported_2_bits rdma_transport_tx_ft_field_bitmask_support_2;
+
+ u8 reserved_at_800[0x3800];
+};
+
struct mlx5_ifc_adv_virtualization_cap_bits {
u8 reserved_at_0[0x3];
u8 pg_track_log_max_num[0x5];
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH mlx5-next 2/3] net/mlx5: fs, add RDMA TRANSPORT steering domain support
2025-01-02 11:36 [PATCH rdma-next 0/3] Add RDMA TRANSPORT steering domain Leon Romanovsky
2025-01-02 11:36 ` [PATCH mlx5-next 1/3] net/mlx5: Query ADV_RDMA capabilities Leon Romanovsky
@ 2025-01-02 11:36 ` Leon Romanovsky
2025-01-02 11:36 ` [PATCH rdma-next 3/3] RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace Leon Romanovsky
2 siblings, 0 replies; 4+ messages in thread
From: Leon Romanovsky @ 2025-01-02 11:36 UTC (permalink / raw)
To: Jason Gunthorpe
Cc: Patrisious Haddad, Andrew Lunn, Eric Dumazet, Jakub Kicinski,
linux-rdma, Mark Bloch, netdev, Paolo Abeni, Saeed Mahameed,
Tariq Toukan
From: Patrisious Haddad <phaddad@nvidia.com>
Add RX and TX RDMA_TRANSPORT flow table namespace, and the ability
to create flow tables in those namespaces.
The RDMA_TRANSPORT RX and TX are per vport.
Packets will traverse through RDMA_TRANSPORT_RX after RDMA_RX and through
RDMA_TRANSPORT_TX before RDMA_TX, ensuring proper control and management.
RDMA_TRANSPORT domains are managed by the vport group manager.
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
.../mellanox/mlx5/core/esw/acl/helper.c | 2 +-
.../mellanox/mlx5/core/eswitch_offloads.c | 6 +-
.../net/ethernet/mellanox/mlx5/core/fs_cmd.c | 2 +
.../net/ethernet/mellanox/mlx5/core/fs_core.c | 178 ++++++++++++++++--
.../net/ethernet/mellanox/mlx5/core/fs_core.h | 12 +-
include/linux/mlx5/device.h | 6 +
include/linux/mlx5/fs.h | 11 +-
7 files changed, 196 insertions(+), 21 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
index d599e50af346b..3ce455c2535c4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -27,7 +27,7 @@ esw_acl_table_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, int ns,
esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
- root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport->index);
+ root_ns = mlx5_get_flow_vport_namespace(dev, ns, vport->index);
if (!root_ns) {
esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
vport_num);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d5b42b3a19fdf..fdbb79a83c900 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -2831,9 +2831,9 @@ static int esw_set_master_egress_rule(struct mlx5_core_dev *master,
if (IS_ERR(vport))
return PTR_ERR(vport);
- egress_ns = mlx5_get_flow_vport_acl_namespace(master,
- MLX5_FLOW_NAMESPACE_ESW_EGRESS,
- vport->index);
+ egress_ns = mlx5_get_flow_vport_namespace(master,
+ MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+ vport->index);
if (!egress_ns)
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 676005854dad4..86f9c0fbed928 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -1141,6 +1141,8 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_default(enum fs_flow_table_type typ
case FS_FT_RDMA_RX:
case FS_FT_RDMA_TX:
case FS_FT_PORT_SEL:
+ case FS_FT_RDMA_TRANSPORT_RX:
+ case FS_FT_RDMA_TRANSPORT_TX:
return mlx5_fs_cmd_get_fw_cmds();
default:
return mlx5_fs_cmd_get_stub_cmds();
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2eabfcc247c6a..879c36a88a26c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1449,7 +1449,7 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns,
struct mlx5_flow_table *ft;
int autogroups_max_fte;
- ft = mlx5_create_flow_table(ns, ft_attr);
+ ft = mlx5_create_vport_flow_table(ns, ft_attr, ft_attr->vport);
if (IS_ERR(ft))
return ft;
@@ -2756,9 +2756,9 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
}
EXPORT_SYMBOL(mlx5_get_flow_namespace);
-struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
- enum mlx5_flow_namespace_type type,
- int vport)
+struct mlx5_flow_namespace *
+mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type, int vport_idx)
{
struct mlx5_flow_steering *steering = dev->priv.steering;
@@ -2767,25 +2767,43 @@ struct mlx5_flow_namespace *mlx5_get_flow_vport_acl_namespace(struct mlx5_core_d
switch (type) {
case MLX5_FLOW_NAMESPACE_ESW_EGRESS:
- if (vport >= steering->esw_egress_acl_vports)
+ if (vport_idx >= steering->esw_egress_acl_vports)
return NULL;
if (steering->esw_egress_root_ns &&
- steering->esw_egress_root_ns[vport])
- return &steering->esw_egress_root_ns[vport]->ns;
+ steering->esw_egress_root_ns[vport_idx])
+ return &steering->esw_egress_root_ns[vport_idx]->ns;
else
return NULL;
case MLX5_FLOW_NAMESPACE_ESW_INGRESS:
- if (vport >= steering->esw_ingress_acl_vports)
+ if (vport_idx >= steering->esw_ingress_acl_vports)
return NULL;
if (steering->esw_ingress_root_ns &&
- steering->esw_ingress_root_ns[vport])
- return &steering->esw_ingress_root_ns[vport]->ns;
+ steering->esw_ingress_root_ns[vport_idx])
+ return &steering->esw_ingress_root_ns[vport_idx]->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ if (vport_idx >= steering->rdma_transport_rx_vports)
+ return NULL;
+ if (steering->rdma_transport_rx_root_ns &&
+ steering->rdma_transport_rx_root_ns[vport_idx])
+ return &steering->rdma_transport_rx_root_ns[vport_idx]->ns;
+ else
+ return NULL;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (vport_idx >= steering->rdma_transport_tx_vports)
+ return NULL;
+
+ if (steering->rdma_transport_tx_root_ns &&
+ steering->rdma_transport_tx_root_ns[vport_idx])
+ return &steering->rdma_transport_tx_root_ns[vport_idx]->ns;
else
return NULL;
default:
return NULL;
}
}
+EXPORT_SYMBOL(mlx5_get_flow_vport_namespace);
static struct fs_prio *_fs_create_prio(struct mlx5_flow_namespace *ns,
unsigned int prio,
@@ -3191,6 +3209,127 @@ static int init_rdma_tx_root_ns(struct mlx5_flow_steering *steering)
return err;
}
+static int
+init_rdma_transport_rx_root_ns_one(struct mlx5_flow_steering *steering,
+ int vport_idx)
+{
+ struct fs_prio *prio;
+
+ steering->rdma_transport_rx_root_ns[vport_idx] =
+ create_root_ns(steering, FS_FT_RDMA_TRANSPORT_RX);
+ if (!steering->rdma_transport_rx_root_ns[vport_idx])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->rdma_transport_rx_root_ns[vport_idx]->ns,
+ MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int
+init_rdma_transport_tx_root_ns_one(struct mlx5_flow_steering *steering,
+ int vport_idx)
+{
+ struct fs_prio *prio;
+
+ steering->rdma_transport_tx_root_ns[vport_idx] =
+ create_root_ns(steering, FS_FT_RDMA_TRANSPORT_TX);
+ if (!steering->rdma_transport_tx_root_ns[vport_idx])
+ return -ENOMEM;
+
+ /* create 1 prio*/
+ prio = fs_create_prio(&steering->rdma_transport_tx_root_ns[vport_idx]->ns,
+ MLX5_RDMA_TRANSPORT_BYPASS_PRIO, 1);
+ return PTR_ERR_OR_ZERO(prio);
+}
+
+static int init_rdma_transport_rx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_core_dev *dev = steering->dev;
+ int total_vports;
+ int err;
+ int i;
+
+ /* In case eswitch not supported and working in legacy mode */
+ total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1;
+
+ steering->rdma_transport_rx_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->rdma_transport_rx_root_ns),
+ GFP_KERNEL);
+ if (!steering->rdma_transport_rx_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < total_vports; i++) {
+ err = init_rdma_transport_rx_root_ns_one(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+ steering->rdma_transport_rx_vports = total_vports;
+ return 0;
+
+cleanup_root_ns:
+ while (i--)
+ cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]);
+ kfree(steering->rdma_transport_rx_root_ns);
+ steering->rdma_transport_rx_root_ns = NULL;
+ return err;
+}
+
+static int init_rdma_transport_tx_root_ns(struct mlx5_flow_steering *steering)
+{
+ struct mlx5_core_dev *dev = steering->dev;
+ int total_vports;
+ int err;
+ int i;
+
+ /* In case eswitch not supported and working in legacy mode */
+ total_vports = mlx5_eswitch_get_total_vports(dev) ?: 1;
+
+ steering->rdma_transport_tx_root_ns =
+ kcalloc(total_vports,
+ sizeof(*steering->rdma_transport_tx_root_ns),
+ GFP_KERNEL);
+ if (!steering->rdma_transport_tx_root_ns)
+ return -ENOMEM;
+
+ for (i = 0; i < total_vports; i++) {
+ err = init_rdma_transport_tx_root_ns_one(steering, i);
+ if (err)
+ goto cleanup_root_ns;
+ }
+ steering->rdma_transport_tx_vports = total_vports;
+ return 0;
+
+cleanup_root_ns:
+ while (i--)
+ cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]);
+ kfree(steering->rdma_transport_tx_root_ns);
+ steering->rdma_transport_tx_root_ns = NULL;
+ return err;
+}
+
+static void cleanup_rdma_transport_roots_ns(struct mlx5_flow_steering *steering)
+{
+ int i;
+
+ if (steering->rdma_transport_rx_root_ns) {
+ for (i = 0; i < steering->rdma_transport_rx_vports; i++)
+ cleanup_root_ns(steering->rdma_transport_rx_root_ns[i]);
+
+ kfree(steering->rdma_transport_rx_root_ns);
+ steering->rdma_transport_rx_root_ns = NULL;
+ }
+
+ if (steering->rdma_transport_tx_root_ns) {
+ for (i = 0; i < steering->rdma_transport_tx_vports; i++)
+ cleanup_root_ns(steering->rdma_transport_tx_root_ns[i]);
+
+ kfree(steering->rdma_transport_tx_root_ns);
+ steering->rdma_transport_tx_root_ns = NULL;
+ }
+}
+
/* FT and tc chains are stored in the same array so we can re-use the
* mlx5_get_fdb_sub_ns() and tc api for FT chains.
* When creating a new ns for each chain store it in the first available slot.
@@ -3607,6 +3746,7 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev)
cleanup_root_ns(steering->rdma_rx_root_ns);
cleanup_root_ns(steering->rdma_tx_root_ns);
cleanup_root_ns(steering->egress_root_ns);
+ cleanup_rdma_transport_roots_ns(steering);
devl_params_unregister(priv_to_devlink(dev), mlx5_fs_params,
ARRAY_SIZE(mlx5_fs_params));
@@ -3677,6 +3817,18 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev)
goto err;
}
+ if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(dev, ft_support)) {
+ err = init_rdma_transport_rx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
+ if (MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(dev, ft_support)) {
+ err = init_rdma_transport_tx_root_ns(steering);
+ if (err)
+ goto err;
+ }
+
return 0;
err:
@@ -3827,8 +3979,10 @@ mlx5_get_root_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type
struct mlx5_flow_namespace *ns;
if (ns_type == MLX5_FLOW_NAMESPACE_ESW_EGRESS ||
- ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS)
- ns = mlx5_get_flow_vport_acl_namespace(dev, ns_type, 0);
+ ns_type == MLX5_FLOW_NAMESPACE_ESW_INGRESS ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ ns = mlx5_get_flow_vport_namespace(dev, ns_type, 0);
else
ns = mlx5_get_flow_namespace(dev, ns_type);
if (!ns)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index bad2df0715ecc..6da60e679dcb4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -112,7 +112,9 @@ enum fs_flow_table_type {
FS_FT_PORT_SEL = 0X9,
FS_FT_FDB_RX = 0xa,
FS_FT_FDB_TX = 0xb,
- FS_FT_MAX_TYPE = FS_FT_FDB_TX,
+ FS_FT_RDMA_TRANSPORT_RX = 0xd,
+ FS_FT_RDMA_TRANSPORT_TX = 0xe,
+ FS_FT_MAX_TYPE = FS_FT_RDMA_TRANSPORT_TX,
};
enum fs_flow_table_op_mod {
@@ -154,6 +156,10 @@ struct mlx5_flow_steering {
struct mlx5_flow_root_namespace *port_sel_root_ns;
int esw_egress_acl_vports;
int esw_ingress_acl_vports;
+ struct mlx5_flow_root_namespace **rdma_transport_rx_root_ns;
+ struct mlx5_flow_root_namespace **rdma_transport_tx_root_ns;
+ int rdma_transport_rx_vports;
+ int rdma_transport_tx_vports;
};
struct fs_node {
@@ -382,7 +388,9 @@ struct mlx5_flow_root_namespace *find_root(struct fs_node *node);
(type == FS_FT_PORT_SEL) ? MLX5_CAP_FLOWTABLE_PORT_SELECTION(mdev, cap) : \
(type == FS_FT_FDB_RX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \
(type == FS_FT_FDB_TX) ? MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, cap) : \
- (BUILD_BUG_ON_ZERO(FS_FT_FDB_TX != FS_FT_MAX_TYPE))\
+ (type == FS_FT_RDMA_TRANSPORT_RX) ? MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) : \
+ (type == FS_FT_RDMA_TRANSPORT_TX) ? MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) : \
+ (BUILD_BUG_ON_ZERO(FS_FT_RDMA_TRANSPORT_TX != FS_FT_MAX_TYPE))\
)
#endif
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index da5bcde853da3..e33ea22463424 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1344,6 +1344,12 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_FLOWTABLE_RDMA_TX(mdev, cap) \
MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_rdma.cap)
+#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(mdev, cap) \
+ MLX5_CAP_ADV_RDMA(mdev, rdma_transport_rx_flow_table_properties.cap)
+
+#define MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(mdev, cap) \
+ MLX5_CAP_ADV_RDMA(mdev, rdma_transport_tx_flow_table_properties.cap)
+
#define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \
MLX5_GET(flow_table_eswitch_cap, \
mdev->caps.hca[MLX5_CAP_ESWITCH_FLOW_TABLE]->cur, cap)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 438db888bde0d..45c60a9ae8de3 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -40,6 +40,8 @@
#define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v)
+#define MLX5_RDMA_TRANSPORT_BYPASS_PRIO 0
+
enum mlx5_flow_destination_type {
MLX5_FLOW_DESTINATION_TYPE_NONE,
MLX5_FLOW_DESTINATION_TYPE_VPORT,
@@ -108,6 +110,8 @@ enum mlx5_flow_namespace_type {
MLX5_FLOW_NAMESPACE_RDMA_TX_IPSEC,
MLX5_FLOW_NAMESPACE_RDMA_RX_MACSEC,
MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC,
+ MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX,
+ MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX,
};
enum {
@@ -192,9 +196,9 @@ struct mlx5_flow_namespace *
mlx5_get_flow_namespace(struct mlx5_core_dev *dev,
enum mlx5_flow_namespace_type type);
struct mlx5_flow_namespace *
-mlx5_get_flow_vport_acl_namespace(struct mlx5_core_dev *dev,
- enum mlx5_flow_namespace_type type,
- int vport);
+mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ int vport_idx);
struct mlx5_flow_table_attr {
int prio;
@@ -202,6 +206,7 @@ struct mlx5_flow_table_attr {
u32 level;
u32 flags;
u16 uid;
+ u16 vport;
struct mlx5_flow_table *next_ft;
struct {
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH rdma-next 3/3] RDMA/mlx5: Expose RDMA TRANSPORT flow table types to userspace
2025-01-02 11:36 [PATCH rdma-next 0/3] Add RDMA TRANSPORT steering domain Leon Romanovsky
2025-01-02 11:36 ` [PATCH mlx5-next 1/3] net/mlx5: Query ADV_RDMA capabilities Leon Romanovsky
2025-01-02 11:36 ` [PATCH mlx5-next 2/3] net/mlx5: fs, add RDMA TRANSPORT steering domain support Leon Romanovsky
@ 2025-01-02 11:36 ` Leon Romanovsky
2 siblings, 0 replies; 4+ messages in thread
From: Leon Romanovsky @ 2025-01-02 11:36 UTC (permalink / raw)
To: Jason Gunthorpe; +Cc: Patrisious Haddad, linux-rdma, Mark Bloch
From: Patrisious Haddad <phaddad@nvidia.com>
This patch adds RDMA_TRANSPORT_RX and RDMA_TRANSPORT_TX as a new flow
table type for matcher creation.
Signed-off-by: Patrisious Haddad <phaddad@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/infiniband/hw/mlx5/fs.c | 140 ++++++++++++++++++++--
drivers/infiniband/hw/mlx5/fs.h | 2 +
drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +
include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 +
include/uapi/rdma/mlx5_user_ioctl_verbs.h | 2 +
5 files changed, 135 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 520034acf73aa..2d1430673238e 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -690,7 +690,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
struct mlx5_ib_flow_prio *prio,
int priority,
int num_entries, int num_groups,
- u32 flags)
+ u32 flags, u16 vport)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_table *ft;
@@ -698,6 +698,7 @@ static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_ib_dev *dev,
ft_attr.prio = priority;
ft_attr.max_fte = num_entries;
ft_attr.flags = flags;
+ ft_attr.vport = vport;
ft_attr.autogroup.max_num_groups = num_groups;
ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ft))
@@ -792,7 +793,7 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
ft = prio->flow_table;
if (!ft)
return _get_prio(dev, ns, prio, priority, max_table_size,
- num_groups, flags);
+ num_groups, flags, 0);
return prio;
}
@@ -935,7 +936,7 @@ int mlx5_ib_fs_add_op_fc(struct mlx5_ib_dev *dev, u32 port_num,
prio = &dev->flow_db->opfcs[type];
if (!prio->flow_table) {
prio = _get_prio(dev, ns, prio, priority,
- dev->num_ports * MAX_OPFC_RULES, 1, 0);
+ dev->num_ports * MAX_OPFC_RULES, 1, 0, 0);
if (IS_ERR(prio)) {
err = PTR_ERR(prio);
goto free;
@@ -1413,17 +1414,51 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
return ERR_PTR(err);
}
+static int mlx5_ib_fill_transport_ns_info(struct mlx5_ib_dev *dev,
+ enum mlx5_flow_namespace_type type,
+ u32 *flags, u16 *vport_idx,
+ u16 *vport,
+ struct mlx5_core_dev **ft_mdev,
+ u32 ib_port)
+{
+ struct mlx5_core_dev *esw_mdev;
+
+ if (!is_mdev_switchdev_mode(dev->mdev))
+ return 0;
+
+ if (!MLX5_CAP_ADV_RDMA(dev->mdev, rdma_transport_manager))
+ return -EOPNOTSUPP;
+
+ if (!dev->port[ib_port - 1].rep)
+ return -EINVAL;
+
+ esw_mdev = mlx5_eswitch_get_core_dev(dev->port[ib_port - 1].rep->esw);
+ if (esw_mdev != dev->mdev)
+ return -EOPNOTSUPP;
+
+ *flags |= MLX5_FLOW_TABLE_OTHER_VPORT;
+ *ft_mdev = esw_mdev;
+ *vport = dev->port[ib_port - 1].rep->vport;
+ *vport_idx = dev->port[ib_port - 1].rep->vport_index;
+
+ return 0;
+}
+
static struct mlx5_ib_flow_prio *
_get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
enum mlx5_flow_namespace_type ns_type,
- bool mcast)
+ bool mcast, u32 ib_port)
{
+ struct mlx5_core_dev *ft_mdev = dev->mdev;
struct mlx5_flow_namespace *ns = NULL;
struct mlx5_ib_flow_prio *prio = NULL;
int max_table_size = 0;
+ u16 vport_idx = 0;
bool esw_encap;
u32 flags = 0;
+ u16 vport = 0;
int priority;
+ int ret;
if (mcast)
priority = MLX5_IB_FLOW_MCAST_PRIO;
@@ -1471,13 +1506,38 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, log_max_ft_size));
priority = user_priority;
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ if (ib_port == 0 || user_priority > MLX5_RDMA_TRANSPORT_BYPASS_PRIO)
+ return ERR_PTR(-EINVAL);
+ ret = mlx5_ib_fill_transport_ns_info(dev, ns_type, &flags,
+ &vport_idx, &vport,
+ &ft_mdev, ib_port);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX)
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(
+ ft_mdev, log_max_ft_size));
+ else
+ max_table_size =
+ BIT(MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(
+ ft_mdev, log_max_ft_size));
+ priority = user_priority;
+ break;
default:
break;
}
max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES);
- ns = mlx5_get_flow_namespace(dev->mdev, ns_type);
+ if (ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX ||
+ ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX)
+ ns = mlx5_get_flow_vport_namespace(ft_mdev, ns_type, vport_idx);
+ else
+ ns = mlx5_get_flow_namespace(ft_mdev, ns_type);
+
if (!ns)
return ERR_PTR(-EOPNOTSUPP);
@@ -1497,6 +1557,12 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
case MLX5_FLOW_NAMESPACE_RDMA_TX:
prio = &dev->flow_db->rdma_tx[priority];
break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX:
+ prio = &dev->flow_db->rdma_transport_rx[ib_port - 1];
+ break;
+ case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX:
+ prio = &dev->flow_db->rdma_transport_tx[ib_port - 1];
+ break;
default: return ERR_PTR(-EINVAL);
}
@@ -1507,7 +1573,7 @@ _get_flow_table(struct mlx5_ib_dev *dev, u16 user_priority,
return prio;
return _get_prio(dev, ns, prio, priority, max_table_size,
- MLX5_FS_MAX_TYPES, flags);
+ MLX5_FS_MAX_TYPES, flags, vport);
}
static struct mlx5_ib_flow_handler *
@@ -1626,7 +1692,8 @@ static struct mlx5_ib_flow_handler *raw_fs_rule_add(
mutex_lock(&dev->flow_db->lock);
ft_prio = _get_flow_table(dev, fs_matcher->priority,
- fs_matcher->ns_type, mcast);
+ fs_matcher->ns_type, mcast,
+ fs_matcher->ib_port);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto unlock;
@@ -1738,6 +1805,12 @@ mlx5_ib_ft_type_to_namespace(enum mlx5_ib_uapi_flow_table_type table_type,
case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX:
*namespace = MLX5_FLOW_NAMESPACE_RDMA_TX;
break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX;
+ break;
+ case MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX:
+ *namespace = MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX;
+ break;
default:
return -EINVAL;
}
@@ -1827,7 +1900,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only DEVX object or QP as dest when inserting to RDMA_RX */
- if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
((!dest_devx && !dest_qp) || (dest_devx && dest_qp)))
return -EINVAL;
@@ -1844,7 +1918,8 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
return -EINVAL;
/* Allow only flow table as dest when inserting to FDB or RDMA_RX */
if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB_BYPASS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX) &&
*dest_type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
return -EINVAL;
} else if (dest_qp) {
@@ -1865,14 +1940,16 @@ static int get_dests(struct uverbs_attr_bundle *attrs,
*dest_id = mqp->raw_packet_qp.rq.tirn;
*dest_type = MLX5_FLOW_DESTINATION_TYPE_TIR;
} else if ((fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) &&
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) &&
!(*flags & MLX5_IB_ATTR_CREATE_FLOW_FLAGS_DROP)) {
*dest_type = MLX5_FLOW_DESTINATION_TYPE_PORT;
}
if (*dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
(fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS ||
- fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX))
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX ||
+ fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX))
return -EINVAL;
return 0;
@@ -2386,6 +2463,22 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_MATCHER_CREATE)(
goto end;
}
+ if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT)) {
+ err = uverbs_copy_from(&obj->ib_port, attrs,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT);
+ if (err)
+ goto end;
+ if (!rdma_is_port_valid(&dev->ib_dev, obj->ib_port)) {
+ err = -EINVAL;
+ goto end;
+ }
+ if (obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX &&
+ obj->ns_type != MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_TX) {
+ err = -EINVAL;
+ goto end;
+ }
+ }
+
uobj->object = obj;
obj->mdev = dev->mdev;
atomic_set(&obj->usecnt, 0);
@@ -2433,7 +2526,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_STEERING_ANCHOR_CREATE)(
mutex_lock(&dev->flow_db->lock);
- ft_prio = _get_flow_table(dev, priority, ns_type, 0);
+ ft_prio = _get_flow_table(dev, priority, ns_type, 0, 0);
if (IS_ERR(ft_prio)) {
err = PTR_ERR(ft_prio);
goto free_obj;
@@ -2819,7 +2912,10 @@ DECLARE_UVERBS_NAMED_METHOD(
UA_OPTIONAL),
UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
enum mlx5_ib_uapi_flow_table_type,
- UA_OPTIONAL));
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_OPTIONAL));
DECLARE_UVERBS_NAMED_METHOD_DESTROY(
MLX5_IB_METHOD_FLOW_MATCHER_DESTROY,
@@ -2889,8 +2985,26 @@ int mlx5_ib_fs_init(struct mlx5_ib_dev *dev)
if (!dev->flow_db)
return -ENOMEM;
+ dev->flow_db->rdma_transport_rx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_rx)
+ goto free_flow_db;
+
+ dev->flow_db->rdma_transport_tx = kcalloc(dev->num_ports,
+ sizeof(struct mlx5_ib_flow_prio),
+ GFP_KERNEL);
+ if (!dev->flow_db->rdma_transport_tx)
+ goto free_rdma_transport_rx;
+
mutex_init(&dev->flow_db->lock);
ib_set_device_ops(&dev->ib_dev, &flow_ops);
return 0;
+
+free_rdma_transport_rx:
+ kfree(dev->flow_db->rdma_transport_rx);
+free_flow_db:
+ kfree(dev->flow_db);
+ return -ENOMEM;
}
diff --git a/drivers/infiniband/hw/mlx5/fs.h b/drivers/infiniband/hw/mlx5/fs.h
index b9734904f5f01..0516555eb1c17 100644
--- a/drivers/infiniband/hw/mlx5/fs.h
+++ b/drivers/infiniband/hw/mlx5/fs.h
@@ -40,6 +40,8 @@ static inline void mlx5_ib_fs_cleanup(struct mlx5_ib_dev *dev)
* is a safe assumption that all references are gone.
*/
mlx5_ib_fs_cleanup_anchor(dev);
+ kfree(dev->flow_db->rdma_transport_tx);
+ kfree(dev->flow_db->rdma_transport_rx);
kfree(dev->flow_db);
}
#endif /* _MLX5_IB_FS_H */
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6b6e8ca2f9070..a0138bdfa3894 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -276,6 +276,7 @@ struct mlx5_ib_flow_matcher {
struct mlx5_core_dev *mdev;
atomic_t usecnt;
u8 match_criteria_enable;
+ u32 ib_port;
};
struct mlx5_ib_steering_anchor {
@@ -307,6 +308,8 @@ struct mlx5_ib_flow_db {
struct mlx5_ib_flow_prio rdma_tx[MLX5_IB_NUM_FLOW_FT];
struct mlx5_ib_flow_prio opfcs[MLX5_IB_OPCOUNTER_MAX];
struct mlx5_flow_table *lag_demux_ft;
+ struct mlx5_ib_flow_prio *rdma_transport_rx;
+ struct mlx5_ib_flow_prio *rdma_transport_tx;
/* Protect flow steering bypass flow tables
* when add/del flow rules.
* only single add/removal of flow steering rule could be done
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index fd2e4a3a56b36..18f9fe0702132 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -239,6 +239,7 @@ enum mlx5_ib_flow_matcher_create_attrs {
MLX5_IB_ATTR_FLOW_MATCHER_MATCH_CRITERIA,
MLX5_IB_ATTR_FLOW_MATCHER_FLOW_FLAGS,
MLX5_IB_ATTR_FLOW_MATCHER_FT_TYPE,
+ MLX5_IB_ATTR_FLOW_MATCHER_IB_PORT,
};
enum mlx5_ib_flow_matcher_destroy_attrs {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index 7c233df475e71..8f86e79d78a5f 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -45,6 +45,8 @@ enum mlx5_ib_uapi_flow_table_type {
MLX5_IB_UAPI_FLOW_TABLE_TYPE_FDB = 0x2,
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_RX = 0x3,
MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TX = 0x4,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_RX = 0x5,
+ MLX5_IB_UAPI_FLOW_TABLE_TYPE_RDMA_TRANSPORT_TX = 0x6,
};
enum mlx5_ib_uapi_flow_action_packet_reformat_type {
--
2.47.1
^ permalink raw reply related [flat|nested] 4+ messages in thread