From: Aurelien Aptel <aaptel@nvidia.com>
To: linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
sagi@grimberg.me, hch@lst.de, kbusch@kernel.org, axboe@fb.com,
chaitanyak@nvidia.com, davem@davemloft.net, kuba@kernel.org
Cc: aaptel@nvidia.com, aurelien.aptel@gmail.com, smalin@nvidia.com,
malin1024@gmail.com, ogerlitz@nvidia.com, yorayz@nvidia.com,
borisp@nvidia.com, galshalom@nvidia.com, mgurtovoy@nvidia.com,
tariqt@nvidia.com, gus@collabora.com
Subject: [PATCH v28 12/20] net/mlx5: Add NVMEoTCP caps, HW bits, 128B CQE and enumerations
Date: Wed, 30 Apr 2025 08:57:33 +0000 [thread overview]
Message-ID: <20250430085741.5108-13-aaptel@nvidia.com> (raw)
In-Reply-To: <20250430085741.5108-1-aaptel@nvidia.com>
From: Ben Ben-Ishay <benishay@nvidia.com>
Add the necessary infrastructure for NVMEoTCP offload:
- Create mlx5_cqe128 structure for NVMEoTCP offload.
The new structure consist from the regular mlx5_cqe64 +
NVMEoTCP data information for offloaded packets.
- Add nvmetcp field to mlx5_cqe64, this field define the type
of the data that the additional NVMEoTCP part represents.
- Add nvmeotcp_zero_copy_en + nvmeotcp_crc_en bit
to the TIR, for identify NVMEoTCP offload flow
and tag_buffer_id that will be used by the
connected nvmeotcp_queues.
- Add new capability to HCA_CAP that represents the
NVMEoTCP offload ability.
Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Signed-off-by: Or Gerlitz <ogerlitz@nvidia.com>
Signed-off-by: Aurelien Aptel <aaptel@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
---
drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 ++
include/linux/mlx5/device.h | 51 ++++++++++++-
include/linux/mlx5/mlx5_ifc.h | 77 +++++++++++++++++++-
include/linux/mlx5/qp.h | 1 +
4 files changed, 130 insertions(+), 5 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 57476487e31f..a1b437b91c4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -294,6 +294,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, nvmeotcp)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_NVMEOTCP);
+ if (err)
+ return err;
+ }
+
return 0;
}
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index b071df6e4e53..4ec55b8881a9 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -265,6 +265,7 @@ enum {
enum {
MLX5_MKEY_MASK_LEN = 1ull << 0,
MLX5_MKEY_MASK_PAGE_SIZE = 1ull << 1,
+ MLX5_MKEY_MASK_XLT_OCT_SIZE = 1ull << 2,
MLX5_MKEY_MASK_START_ADDR = 1ull << 6,
MLX5_MKEY_MASK_PD = 1ull << 7,
MLX5_MKEY_MASK_EN_RINVAL = 1ull << 8,
@@ -821,7 +822,11 @@ struct mlx5_err_cqe {
struct mlx5_cqe64 {
u8 tls_outer_l3_tunneled;
- u8 rsvd0;
+ u8 rsvd16bit:4;
+ u8 nvmeotcp_zc:1;
+ u8 nvmeotcp_ddgst:1;
+ u8 nvmeotcp_resync:1;
+ u8 rsvd23bit:1;
__be16 wqe_id;
union {
struct {
@@ -870,6 +875,19 @@ struct mlx5_cqe64 {
u8 op_own;
};
+struct mlx5e_cqe128 {
+ __be16 cclen;
+ __be16 hlen;
+ union {
+ __be32 resync_tcp_sn;
+ __be32 ccoff;
+ };
+ __be16 ccid;
+ __be16 rsvd8;
+ u8 rsvd12[52];
+ struct mlx5_cqe64 cqe64;
+};
+
struct mlx5_mini_cqe8 {
union {
__be32 rx_hash_result;
@@ -905,6 +923,28 @@ enum {
#define MLX5_MINI_CQE_ARRAY_SIZE 8
+static inline bool cqe_is_nvmeotcp_resync(struct mlx5_cqe64 *cqe)
+{
+ return cqe->nvmeotcp_resync;
+}
+
+static inline bool cqe_is_nvmeotcp_crcvalid(struct mlx5_cqe64 *cqe)
+{
+ return cqe->nvmeotcp_ddgst;
+}
+
+static inline bool cqe_is_nvmeotcp_zc(struct mlx5_cqe64 *cqe)
+{
+ return cqe->nvmeotcp_zc;
+}
+
+/* check if cqe is zc or crc or resync */
+static inline bool cqe_is_nvmeotcp(struct mlx5_cqe64 *cqe)
+{
+ return cqe_is_nvmeotcp_zc(cqe) || cqe_is_nvmeotcp_crcvalid(cqe) ||
+ cqe_is_nvmeotcp_resync(cqe);
+}
+
static inline u8 mlx5_get_cqe_format(struct mlx5_cqe64 *cqe)
{
return (cqe->op_own >> 2) & 0x3;
@@ -1245,6 +1285,7 @@ enum mlx5_cap_type {
MLX5_CAP_VDPA_EMULATION = 0x13,
MLX5_CAP_DEV_EVENT = 0x14,
MLX5_CAP_IPSEC,
+ MLX5_CAP_DEV_NVMEOTCP = 0x19,
MLX5_CAP_CRYPTO = 0x1a,
MLX5_CAP_SHAMPO = 0x1d,
MLX5_CAP_MACSEC = 0x1f,
@@ -1486,6 +1527,14 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_SHAMPO(mdev, cap) \
MLX5_GET(shampo_cap, mdev->caps.hca[MLX5_CAP_SHAMPO]->cur, cap)
+#define MLX5_CAP_DEV_NVMEOTCP(mdev, cap)\
+ MLX5_GET(nvmeotcp_cap, \
+ (mdev)->caps.hca[MLX5_CAP_DEV_NVMEOTCP]->cur, cap)
+
+#define MLX5_CAP64_DEV_NVMEOTCP(mdev, cap)\
+ MLX5_GET64(nvmeotcp_cap, \
+ (mdev)->caps.hca[MLX5_CAP_DEV_NVMEOTCP]->cur, cap)
+
enum {
MLX5_CMD_STAT_OK = 0x0,
MLX5_CMD_STAT_INT_ERR = 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 0e348b2065a8..c4f957e5fe94 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1598,6 +1598,20 @@ enum {
MLX5_STEERING_FORMAT_CONNECTX_8 = 3,
};
+struct mlx5_ifc_nvmeotcp_cap_bits {
+ u8 zerocopy[0x1];
+ u8 crc_rx[0x1];
+ u8 crc_tx[0x1];
+ u8 reserved_at_3[0x15];
+ u8 version[0x8];
+
+ u8 reserved_at_20[0x13];
+ u8 log_max_nvmeotcp_tag_buffer_table[0x5];
+ u8 reserved_at_38[0x3];
+ u8 log_max_nvmeotcp_tag_buffer_size[0x5];
+ u8 reserved_at_40[0x7c0];
+};
+
struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x6];
u8 page_request_disable[0x1];
@@ -1625,7 +1639,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 event_cap[0x1];
u8 reserved_at_91[0x2];
u8 isolate_vl_tc_new[0x1];
- u8 reserved_at_94[0x4];
+ u8 reserved_at_94[0x2];
+ u8 nvmeotcp[0x1];
+ u8 reserved_at_97[0x1];
u8 prio_tag_required[0x1];
u8 reserved_at_99[0x2];
u8 log_max_qp[0x5];
@@ -3772,6 +3788,7 @@ union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_macsec_cap_bits macsec_cap;
struct mlx5_ifc_crypto_cap_bits crypto_cap;
struct mlx5_ifc_ipsec_cap_bits ipsec_cap;
+ struct mlx5_ifc_nvmeotcp_cap_bits nvmeotcp_cap;
u8 reserved_at_0[0x8000];
};
@@ -4024,7 +4041,9 @@ struct mlx5_ifc_tirc_bits {
u8 disp_type[0x4];
u8 tls_en[0x1];
- u8 reserved_at_25[0x1b];
+ u8 nvmeotcp_zero_copy_en[0x1];
+ u8 nvmeotcp_crc_en[0x1];
+ u8 reserved_at_27[0x19];
u8 reserved_at_40[0x40];
@@ -4055,7 +4074,8 @@ struct mlx5_ifc_tirc_bits {
struct mlx5_ifc_rx_hash_field_select_bits rx_hash_field_selector_inner;
- u8 reserved_at_2c0[0x4c0];
+ u8 nvmeotcp_tag_buffer_table_id[0x20];
+ u8 reserved_at_2e0[0x4a0];
};
enum {
@@ -12505,6 +12525,8 @@ enum {
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT_ULL(0xc),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT_ULL(0x13),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER = BIT_ULL(0x20),
+ MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_NVMEOTCP_TAG_BUFFER_TABLE =
+ BIT_ULL(0x21),
MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = BIT_ULL(0x24),
};
@@ -12516,6 +12538,7 @@ enum {
MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc,
MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13,
MLX5_GENERAL_OBJECT_TYPES_SAMPLER = 0x20,
+ MLX5_GENERAL_OBJECT_TYPES_NVMEOTCP_TAG_BUFFER_TABLE = 0x21,
MLX5_GENERAL_OBJECT_TYPES_FLOW_METER_ASO = 0x24,
MLX5_GENERAL_OBJECT_TYPES_MACSEC = 0x27,
MLX5_GENERAL_OBJECT_TYPES_INT_KEK = 0x47,
@@ -12890,6 +12913,21 @@ struct mlx5_ifc_query_sampler_obj_out_bits {
struct mlx5_ifc_sampler_obj_bits sampler_object;
};
+struct mlx5_ifc_nvmeotcp_tag_buf_table_obj_bits {
+ u8 modify_field_select[0x40];
+
+ u8 reserved_at_40[0x20];
+
+ u8 reserved_at_60[0x1b];
+ u8 log_tag_buffer_table_size[0x5];
+};
+
+struct mlx5_ifc_create_nvmeotcp_tag_buf_table_in_bits {
+ struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+ struct mlx5_ifc_nvmeotcp_tag_buf_table_obj_bits
+ nvmeotcp_tag_buf_table_obj;
+};
+
enum {
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_128 = 0x0,
MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_KEY_SIZE_256 = 0x1,
@@ -12903,6 +12941,13 @@ enum {
enum {
MLX5_TRANSPORT_STATIC_PARAMS_ACC_TYPE_TLS = 0x1,
+ MLX5_TRANSPORT_STATIC_PARAMS_ACC_TYPE_NVMETCP = 0x2,
+ MLX5_TRANSPORT_STATIC_PARAMS_ACC_TYPE_NVMETCP_WITH_TLS = 0x3,
+};
+
+enum {
+ MLX5_TRANSPORT_STATIC_PARAMS_TI_INITIATOR = 0x0,
+ MLX5_TRANSPORT_STATIC_PARAMS_TI_TARGET = 0x1,
};
struct mlx5_ifc_transport_static_params_bits {
@@ -12925,7 +12970,20 @@ struct mlx5_ifc_transport_static_params_bits {
u8 reserved_at_100[0x8];
u8 dek_index[0x18];
- u8 reserved_at_120[0xe0];
+ u8 reserved_at_120[0x14];
+
+ u8 cccid_ttag[0x1];
+ u8 ti[0x1];
+ u8 zero_copy_en[0x1];
+ u8 ddgst_offload_en[0x1];
+ u8 hdgst_offload_en[0x1];
+ u8 ddgst_en[0x1];
+ u8 hddgst_en[0x1];
+ u8 pda[0x5];
+
+ u8 nvme_resync_tcp_sn[0x20];
+
+ u8 reserved_at_160[0xa0];
};
struct mlx5_ifc_tls_progress_params_bits {
@@ -13283,4 +13341,15 @@ struct mlx5_ifc_mrtcq_reg_bits {
u8 reserved_at_80[0x180];
};
+struct mlx5_ifc_nvmeotcp_progress_params_bits {
+ u8 next_pdu_tcp_sn[0x20];
+
+ u8 hw_resync_tcp_sn[0x20];
+
+ u8 pdu_tracker_state[0x2];
+ u8 offloading_state[0x2];
+ u8 reserved_at_44[0xc];
+ u8 cccid_ttag[0x10];
+};
+
#endif /* MLX5_IFC_H */
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index fc7eeff99a8a..10267ddf1bfe 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -228,6 +228,7 @@ struct mlx5_wqe_ctrl_seg {
#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
+#define MLX5_WQE_CTRL_TIR_TIS_INDEX_SHIFT 8
enum {
MLX5_ETH_WQE_L3_INNER_CSUM = 1 << 4,
--
2.34.1
next prev parent reply other threads:[~2025-04-30 8:58 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-30 8:57 [PATCH v28 00/20] nvme-tcp receive offloads Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 01/20] net: Introduce direct data placement tcp offload Aurelien Aptel
2025-05-14 7:12 ` Eric Dumazet
2025-05-16 14:47 ` Aurelien Aptel
2025-05-16 22:31 ` Jakub Kicinski
2025-05-17 7:38 ` Eric Dumazet
2025-05-22 15:01 ` Aurelien Aptel
2025-06-04 12:33 ` Aurelien Aptel
2025-06-04 12:55 ` Eric Dumazet
2025-06-05 11:54 ` Aurelien Aptel
2025-06-05 12:44 ` Eric Dumazet
2025-04-30 8:57 ` [PATCH v28 02/20] netlink: add new family to manage ULP_DDP enablement and stats Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 03/20] iov_iter: skip copy if src == dst for direct data placement Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 04/20] net/tls,core: export get_netdev_for_sock Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 05/20] nvme-tcp: Add DDP offload control path Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 06/20] nvme-tcp: Add DDP data-path Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 07/20] nvme-tcp: RX DDGST offload Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 08/20] nvme-tcp: Deal with netdevice DOWN events Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 09/20] Documentation: add ULP DDP offload documentation Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 10/20] net/mlx5e: Rename from tls to transport static params Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 11/20] net/mlx5e: Refactor ico sq polling to get budget Aurelien Aptel
2025-04-30 8:57 ` Aurelien Aptel [this message]
2025-04-30 8:57 ` [PATCH v28 13/20] net/mlx5e: NVMEoTCP, offload initialization Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 14/20] net/mlx5e: TCP flow steering for nvme-tcp acceleration Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 15/20] net/mlx5e: NVMEoTCP, use KLM UMRs for buffer registration Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 16/20] net/mlx5e: NVMEoTCP, queue init/teardown Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 17/20] net/mlx5e: NVMEoTCP, ddp setup and resync Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 18/20] net/mlx5e: NVMEoTCP, async ddp invalidation Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 19/20] net/mlx5e: NVMEoTCP, data-path for DDP+DDGST offload Aurelien Aptel
2025-04-30 8:57 ` [PATCH v28 20/20] net/mlx5e: NVMEoTCP, statistics Aurelien Aptel
2025-04-30 12:52 ` [PATCH v28 00/20] nvme-tcp receive offloads Gustavo Padovan
2025-05-05 20:43 ` Jakub Kicinski
2025-05-05 21:51 ` Keith Busch
2025-05-05 22:51 ` Jakub Kicinski
2025-05-13 12:56 ` Aurelien Aptel
2025-05-13 14:36 ` Jakub Kicinski
2025-05-06 13:34 ` Sagi Grimberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250430085741.5108-13-aaptel@nvidia.com \
--to=aaptel@nvidia.com \
--cc=aurelien.aptel@gmail.com \
--cc=axboe@fb.com \
--cc=borisp@nvidia.com \
--cc=chaitanyak@nvidia.com \
--cc=davem@davemloft.net \
--cc=galshalom@nvidia.com \
--cc=gus@collabora.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=malin1024@gmail.com \
--cc=mgurtovoy@nvidia.com \
--cc=netdev@vger.kernel.org \
--cc=ogerlitz@nvidia.com \
--cc=sagi@grimberg.me \
--cc=smalin@nvidia.com \
--cc=tariqt@nvidia.com \
--cc=yorayz@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).