From: Boris Pismenny <borisp@nvidia.com>
To: <dsahern@gmail.com>, <kuba@kernel.org>, <davem@davemloft.net>,
<saeedm@nvidia.com>, <hch@lst.de>, <sagi@grimberg.me>,
<axboe@fb.com>, <kbusch@kernel.org>, <viro@zeniv.linux.org.uk>,
<edumazet@google.com>, <smalin@marvell.com>
Cc: <boris.pismenny@gmail.com>, <linux-nvme@lists.infradead.org>,
<netdev@vger.kernel.org>, <benishay@nvidia.com>,
<ogerlitz@nvidia.com>, <yorayz@nvidia.com>,
Boris Pismenny <borisp@mellanox.com>,
Or Gerlitz <ogerlitz@mellanox.com>,
Yoray Zack <yorayz@mellanox.com>
Subject: [PATCH v5 net-next 12/36] net/mlx5e: NVMEoTCP offload initialization
Date: Thu, 22 Jul 2021 14:03:01 +0300 [thread overview]
Message-ID: <20210722110325.371-13-borisp@nvidia.com> (raw)
In-Reply-To: <20210722110325.371-1-borisp@nvidia.com>
From: Ben Ben-Ishay <benishay@nvidia.com>
This commit introduce the initialization blocks for NVMEoTCP offload:
- Use 128B CQEs when NVME-TCP offload is enabled.
- Use a dedicated icosq for NVME-TCP work. This list of SQ is unique in the
sense that it is driven directly by the NVME-TCP layer to submit and
invalidate ddp requests.
- Query nvmeotcp capabilities
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yoray Zack <yorayz@mellanox.com>
---
.../net/ethernet/mellanox/mlx5/core/Kconfig | 10 +
.../net/ethernet/mellanox/mlx5/core/Makefile | 2 +
drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 +
.../ethernet/mellanox/mlx5/core/en/params.c | 11 +-
.../ethernet/mellanox/mlx5/core/en/params.h | 3 +
.../mellanox/mlx5/core/en_accel/en_accel.h | 9 +-
.../mellanox/mlx5/core/en_accel/nvmeotcp.c | 196 ++++++++++++++++++
.../mellanox/mlx5/core/en_accel/nvmeotcp.h | 117 +++++++++++
.../net/ethernet/mellanox/mlx5/core/en_main.c | 26 +++
.../net/ethernet/mellanox/mlx5/core/en_txrx.c | 17 ++
drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 +
11 files changed, 405 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index e1a5a79e27c7..e6079ff2e917 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -233,3 +233,13 @@ config MLX5_SF_MANAGER
port is managed through devlink. A subfunction supports RDMA, netdevice
and vdpa device. It is similar to a SRIOV VF but it doesn't require
SRIOV support.
+
+config MLX5_EN_NVMEOTCP
+ bool "NVMEoTCP accelaration"
+ depends on MLX5_CORE_EN
+ depends on ULP_DDP=y
+ default n
+ help
+ Build support for NVMEoTCP accelaration in the NIC.
+ Note: Support for hardware with this capability needs to be selected
+ for this option to become available.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b5072a3a2585..0ae9e5e38ec7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -101,3 +101,5 @@ mlx5_core-$(CONFIG_MLX5_SF) += sf/vhca_event.o sf/dev/dev.o sf/dev/driver.o
# SF manager
#
mlx5_core-$(CONFIG_MLX5_SF_MANAGER) += sf/cmd.o sf/hw_table.o sf/devlink.o
+
+mlx5_core-$(CONFIG_MLX5_EN_NVMEOTCP) += en_accel/fs_tcp.o en_accel/nvmeotcp.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index b1b51bbba054..1233ebcf311b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -270,6 +270,10 @@ struct mlx5e_params {
unsigned int sw_mtu;
int hard_mtu;
bool ptp_rx;
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ bool nvmeotcp;
+ bool crc_rx_offload;
+#endif
};
enum {
@@ -678,6 +682,10 @@ struct mlx5e_channel {
struct mlx5e_txqsq sq[MLX5E_MAX_NUM_TC];
struct mlx5e_icosq icosq; /* internal control operations */
struct mlx5e_txqsq __rcu * __rcu *qos_sqs;
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ struct list_head list_nvmeotcpsq; /* nvmeotcp umrs */
+ spinlock_t nvmeotcp_icosq_lock;
+#endif
bool xdp;
struct napi_struct napi;
struct device *pdev;
@@ -886,6 +894,9 @@ struct mlx5e_priv {
#endif
#ifdef CONFIG_MLX5_EN_TLS
struct mlx5e_tls *tls;
+#endif
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ struct mlx5e_nvmeotcp *nvmeotcp;
#endif
struct devlink_health_reporter *tx_reporter;
struct devlink_health_reporter *rx_reporter;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 150c8e82c738..a84508425e47 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -437,7 +437,8 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev,
void *cqc = param->cqc;
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
- if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128)
+ if (MLX5_CAP_GEN(mdev, cqe_128_always) &&
+ (cache_line_size() >= 128 || param->force_cqe128))
MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
}
@@ -450,6 +451,12 @@ static void mlx5e_build_rx_cq_param(struct mlx5_core_dev *mdev,
void *cqc = param->cqc;
u8 log_cq_size;
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ /* nvme-tcp offload mandates 128 byte cqes */
+ param->force_cqe128 |= (params->nvmeotcp|| params->crc_rx_offload);
+#endif
+
+
switch (params->rq_wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
@@ -620,7 +627,7 @@ static u8 mlx5e_build_async_icosq_log_wq_sz(struct mlx5_core_dev *mdev)
return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
}
-static void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
+void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
u8 log_wq_size,
struct mlx5e_sq_param *param)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
index e9593f5f0661..4f232ba726ec 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h
@@ -16,6 +16,7 @@ struct mlx5e_cq_param {
struct mlx5_wq_param wq;
u16 eq_ix;
u8 cq_period_mode;
+ bool force_cqe128;
};
struct mlx5e_rq_param {
@@ -147,6 +148,8 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev,
struct mlx5e_params *params,
u16 q_counter,
struct mlx5e_channel_param *cparam);
+void mlx5e_build_icosq_param(struct mlx5_core_dev *mdev,
+ u8 log_wq_size,struct mlx5e_sq_param *param);
u16 mlx5e_calc_sq_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
int mlx5e_validate_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index d964665eaa63..b9404366e6e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -39,6 +39,7 @@
#include "en_accel/ipsec_rxtx.h"
#include "en_accel/tls.h"
#include "en_accel/tls_rxtx.h"
+#include "en_accel/nvmeotcp.h"
#include "en.h"
#include "en/txrx.h"
@@ -195,11 +196,17 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq,
static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv)
{
- return mlx5e_ktls_init_rx(priv);
+ int tls, nvmeotcp;
+
+ tls = mlx5e_ktls_init_rx(priv);
+ nvmeotcp = mlx5e_nvmeotcp_init_rx(priv);
+
+ return tls && nvmeotcp;
}
static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv)
{
+ mlx5e_nvmeotcp_cleanup_rx(priv);
mlx5e_ktls_cleanup_rx(priv);
}
#endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
new file mode 100644
index 000000000000..04e88042b243
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2021 Mellanox Technologies. */
+
+#include <linux/netdevice.h>
+#include <linux/idr.h>
+#include "en_accel/nvmeotcp.h"
+#include "en_accel/fs_tcp.h"
+#include "en/txrx.h"
+
+#define MAX_NVMEOTCP_QUEUES (512)
+#define MIN_NVMEOTCP_QUEUES (1)
+
+static const struct rhashtable_params rhash_queues = {
+ .key_len = sizeof(int),
+ .key_offset = offsetof(struct mlx5e_nvmeotcp_queue, id),
+ .head_offset = offsetof(struct mlx5e_nvmeotcp_queue, hash),
+ .automatic_shrinking = true,
+ .min_size = 1,
+ .max_size = MAX_NVMEOTCP_QUEUES,
+};
+
+static int
+mlx5e_nvmeotcp_offload_limits(struct net_device *netdev,
+ struct ulp_ddp_limits *limits)
+{
+ return 0;
+}
+
+static int
+mlx5e_nvmeotcp_queue_init(struct net_device *netdev,
+ struct sock *sk,
+ struct ulp_ddp_config *tconfig)
+{
+ return 0;
+}
+
+static void
+mlx5e_nvmeotcp_queue_teardown(struct net_device *netdev,
+ struct sock *sk)
+{
+}
+
+static int
+mlx5e_nvmeotcp_ddp_setup(struct net_device *netdev,
+ struct sock *sk,
+ struct ulp_ddp_io *ddp)
+{
+ return 0;
+}
+
+static int
+mlx5e_nvmeotcp_ddp_teardown(struct net_device *netdev,
+ struct sock *sk,
+ struct ulp_ddp_io *ddp,
+ void *ddp_ctx)
+{
+ return 0;
+}
+
+static void
+mlx5e_nvmeotcp_dev_resync(struct net_device *netdev,
+ struct sock *sk, u32 seq)
+{
+}
+
+static const struct ulp_ddp_dev_ops mlx5e_nvmeotcp_ops = {
+ .ulp_ddp_limits = mlx5e_nvmeotcp_offload_limits,
+ .ulp_ddp_sk_add = mlx5e_nvmeotcp_queue_init,
+ .ulp_ddp_sk_del = mlx5e_nvmeotcp_queue_teardown,
+ .ulp_ddp_setup = mlx5e_nvmeotcp_ddp_setup,
+ .ulp_ddp_teardown = mlx5e_nvmeotcp_ddp_teardown,
+ .ulp_ddp_resync = mlx5e_nvmeotcp_dev_resync,
+};
+
+int set_feature_nvme_tcp(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params *params = &priv->channels.params;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+ if (enable)
+ err = mlx5e_accel_fs_tcp_create(priv);
+ else
+ mlx5e_accel_fs_tcp_destroy(priv);
+ mutex_unlock(&priv->state_lock);
+ if (err)
+ return err;
+
+ params->nvmeotcp = enable;
+ priv->nvmeotcp->enable = enable;
+ err = mlx5e_safe_reopen_channels(priv);
+ return err;
+}
+
+int set_feature_nvme_tcp_crc(struct net_device *netdev, bool enable)
+{
+ struct mlx5e_priv *priv = netdev_priv(netdev);
+ struct mlx5e_params *params = &priv->channels.params;
+ int err = 0;
+
+ mutex_lock(&priv->state_lock);
+ if (enable)
+ err = mlx5e_accel_fs_tcp_create(priv);
+ else
+ mlx5e_accel_fs_tcp_destroy(priv);
+ mutex_unlock(&priv->state_lock);
+
+ params->crc_rx_offload = enable;
+ priv->nvmeotcp->crc_rx_enable = enable;
+ err = mlx5e_safe_reopen_channels(priv);
+ if (err)
+ netdev_err(priv->netdev,
+ "%s failed to reopen channels, err(%d).\n",
+ __func__, err);
+
+ return err;
+}
+
+void mlx5e_nvmeotcp_build_netdev(struct mlx5e_priv *priv)
+{
+ struct net_device *netdev = priv->netdev;
+
+ if (!MLX5_CAP_GEN(priv->mdev, nvmeotcp))
+ return;
+
+ if (MLX5_CAP_DEV_NVMEOTCP(priv->mdev, zerocopy)) {
+ netdev->features |= NETIF_F_HW_ULP_DDP;
+ netdev->hw_features |= NETIF_F_HW_ULP_DDP;
+ }
+
+ if (MLX5_CAP_DEV_NVMEOTCP(priv->mdev, crc_rx)) {
+ netdev->features |= NETIF_F_HW_ULP_DDP;
+ netdev->hw_features |= NETIF_F_HW_ULP_DDP;
+ }
+
+ netdev->ulp_ddp_ops = &mlx5e_nvmeotcp_ops;
+}
+
+int mlx5e_nvmeotcp_init_rx(struct mlx5e_priv *priv)
+{
+ int ret = 0;
+
+ if (priv->netdev->features & NETIF_F_HW_ULP_DDP) {
+ ret = mlx5e_accel_fs_tcp_create(priv);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+void mlx5e_nvmeotcp_cleanup_rx(struct mlx5e_priv *priv)
+{
+ if (priv->netdev->features & NETIF_F_HW_ULP_DDP)
+ mlx5e_accel_fs_tcp_destroy(priv);
+}
+
+int mlx5e_nvmeotcp_init(struct mlx5e_priv *priv)
+{
+ struct mlx5e_nvmeotcp *nvmeotcp = kzalloc(sizeof(*nvmeotcp), GFP_KERNEL);
+ int ret = 0;
+
+ if (!nvmeotcp)
+ return -ENOMEM;
+
+ ida_init(&nvmeotcp->queue_ids);
+ ret = rhashtable_init(&nvmeotcp->queue_hash, &rhash_queues);
+ if (ret)
+ goto err_ida;
+
+ priv->nvmeotcp = nvmeotcp;
+ priv->nvmeotcp->enable = true;
+ priv->channels.params.nvmeotcp = nvmeotcp;
+ priv->channels.params.nvmeotcp = true;
+ goto out;
+
+err_ida:
+ ida_destroy(&nvmeotcp->queue_ids);
+ kfree(nvmeotcp);
+out:
+ return ret;
+}
+
+void mlx5e_nvmeotcp_cleanup(struct mlx5e_priv *priv)
+{
+ struct mlx5e_nvmeotcp *nvmeotcp = priv->nvmeotcp;
+
+ if (!nvmeotcp)
+ return;
+
+ rhashtable_destroy(&nvmeotcp->queue_hash);
+ ida_destroy(&nvmeotcp->queue_ids);
+ kfree(nvmeotcp);
+ priv->nvmeotcp = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h
new file mode 100644
index 000000000000..b4a27a03578e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2021 Mellanox Technologies. */
+#ifndef __MLX5E_NVMEOTCP_H__
+#define __MLX5E_NVMEOTCP_H__
+
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+
+#include "net/ulp_ddp.h"
+#include "en.h"
+#include "en/params.h"
+
+struct nvmeotcp_queue_entry {
+ struct mlx5e_nvmeotcp_queue *queue;
+ u32 sgl_length;
+ struct mlx5_core_mkey klm_mkey;
+ struct scatterlist *sgl;
+ u32 ccid_gen;
+ u64 size;
+
+ /* for the ddp invalidate done callback */
+ void *ddp_ctx;
+ struct ulp_ddp_io *ddp;
+};
+
+struct mlx5e_nvmeotcp_sq {
+ struct list_head list;
+ struct mlx5e_icosq icosq;
+};
+
+/**
+ * struct mlx5e_nvmeotcp_queue - MLX5 metadata for NVMEoTCP queue
+ * @fh: Flow handle representing the 5-tuple steering for this flow
+ * @tirn: Destination TIR number created for NVMEoTCP offload
+ * @id: Flow tag ID used to identify this queue
+ * @size: NVMEoTCP queue depth
+ * @sq: Send queue used for sending control messages
+ * @nvmeotcp_icosq_lock: Spin lock for icosq
+ * @ccid_table: Table holding metadata for each CC
+ * @tag_buf_table_id: Tag buffer table for CCIDs
+ * @hash: Hash table of queues mapped by @id
+ * @ref_count: Reference count for this structure
+ * @ccoff: Offset within the current CC
+ * @pda: Padding alignment
+ * @ccid_gen: Generation ID for the CCID, used to avoid conflicts in DDP
+ * @max_klms_per_wqe: Number of KLMs per DDP operation
+ * @channel_ix: Channel IX for this nvmeotcp_queue
+ * @sk: The socket used by the NVMe-TCP queue
+ * @zerocopy: if this queue is used for zerocopy offload.
+ * @crc_rx: if this queue is used for CRC Rx offload.
+ * @ccid: ID of the current CC
+ * @ccsglidx: Index within the scatter-gather list (SGL) of the current CC
+ * @ccoff_inner: Current offset within the @ccsglidx element
+ * @priv: mlx5e netdev priv
+ * @inv_done: invalidate callback of the nvme tcp driver
+ * @after_resync_cqe: indicate if resync occurred
+ */
+struct mlx5e_nvmeotcp_queue {
+ struct ulp_ddp_ctx ulp_ddp_ctx;
+ struct mlx5_flow_handle *fh;
+ int tirn;
+ int id;
+ u32 size;
+ struct mlx5e_nvmeotcp_sq *sq;
+ spinlock_t nvmeotcp_icosq_lock;
+ struct nvmeotcp_queue_entry *ccid_table;
+ u32 tag_buf_table_id;
+ struct rhash_head hash;
+ refcount_t ref_count;
+ bool dgst;
+ int pda;
+ u32 ccid_gen;
+ u32 max_klms_per_wqe;
+ u32 channel_ix;
+ struct sock *sk;
+ bool zerocopy;
+ bool crc_rx;
+
+ /* current ccid fields */
+ off_t ccoff;
+ int ccid;
+ int ccsglidx;
+ int ccoff_inner;
+
+ /* for ddp invalidate flow */
+ struct mlx5e_priv *priv;
+
+ /* for flow_steering flow */
+ struct completion done;
+ /* for MASK HW resync cqe */
+ bool after_resync_cqe;
+};
+
+struct mlx5e_nvmeotcp {
+ struct ida queue_ids;
+ struct rhashtable queue_hash;
+ bool enable;
+ bool crc_rx_enable;
+};
+
+void mlx5e_nvmeotcp_build_netdev(struct mlx5e_priv *priv);
+int mlx5e_nvmeotcp_init(struct mlx5e_priv *priv);
+int set_feature_nvme_tcp(struct net_device *netdev, bool enable);
+int set_feature_nvme_tcp_crc(struct net_device *netdev, bool enable);
+void mlx5e_nvmeotcp_cleanup(struct mlx5e_priv *priv);
+int mlx5e_nvmeotcp_init_rx(struct mlx5e_priv *priv);
+void mlx5e_nvmeotcp_cleanup_rx(struct mlx5e_priv *priv);
+#else
+
+static inline void mlx5e_nvmeotcp_build_netdev(struct mlx5e_priv *priv) { }
+static inline int mlx5e_nvmeotcp_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_nvmeotcp_cleanup(struct mlx5e_priv *priv) { }
+static inline int set_feature_nvme_tcp(struct net_device *netdev, bool enable) { return 0; }
+static inline int set_feature_nvme_tcp_crc(struct net_device *netdev, bool enable) { return 0; }
+static inline int mlx5e_nvmeotcp_init_rx(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_nvmeotcp_cleanup_rx(struct mlx5e_priv *priv) { }
+#endif
+#endif /* __MLX5E_NVMEOTCP_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 414a73d16619..ecb12c7fdb7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -47,6 +47,7 @@
#include "en_accel/ipsec.h"
#include "en_accel/en_accel.h"
#include "en_accel/tls.h"
+#include "en_accel/nvmeotcp.h"
#include "accel/ipsec.h"
#include "accel/tls.h"
#include "lib/vxlan.h"
@@ -2007,6 +2008,10 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
c->aff_mask = irq_get_effective_affinity_mask(irq);
c->lag_port = mlx5e_enumerate_lag_port(priv->mdev, ix);
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ INIT_LIST_HEAD(&c->list_nvmeotcpsq);
+ spin_lock_init(&c->nvmeotcp_icosq_lock);
+#endif
netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64);
err = mlx5e_open_queues(c, params, cparam);
@@ -3820,6 +3825,9 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
err |= MLX5E_HANDLE_FEATURE(NETIF_F_NTUPLE, set_feature_arfs);
#endif
err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TLS_RX, mlx5e_ktls_set_feature_rx);
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_ULP_DDP, set_feature_nvme_tcp);
+#endif
if (err) {
netdev->features = oper_features;
@@ -3858,6 +3866,17 @@ static netdev_features_t mlx5e_fix_features(struct net_device *netdev,
features &= ~NETIF_F_RXHASH;
if (netdev->features & NETIF_F_RXHASH)
netdev_warn(netdev, "Disabling rxhash, not supported when CQE compress is active\n");
+
+ features &= ~NETIF_F_HW_ULP_DDP;
+ if (netdev->features & NETIF_F_HW_ULP_DDP)
+ netdev_warn(netdev, "Disabling tcp-ddp offload, not supported when CQE compress is active\n");
+
+ }
+
+ if (netdev->features & NETIF_F_LRO) {
+ features &= ~NETIF_F_HW_ULP_DDP;
+ if (netdev->features & NETIF_F_HW_ULP_DDP)
+ netdev_warn(netdev, "Disabling tcp-ddp offload, not supported when LRO is active\n");
}
if (mlx5e_is_uplink_rep(priv)) {
@@ -4890,6 +4909,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
mlx5e_set_netdev_dev_addr(netdev);
mlx5e_ipsec_build_netdev(priv);
mlx5e_tls_build_netdev(priv);
+ mlx5e_nvmeotcp_build_netdev(priv);
}
void mlx5e_create_q_counters(struct mlx5e_priv *priv)
@@ -4950,6 +4970,10 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err)
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
+ err = mlx5e_nvmeotcp_init(priv);
+ if (err)
+ mlx5_core_err(mdev, "NVMEoTCP initialization failed, %d\n", err);
+
dl_port = mlx5e_devlink_get_dl_port(priv);
if (dl_port->registered)
mlx5e_health_create_reporters(priv);
@@ -4963,6 +4987,8 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
if (dl_port->registered)
mlx5e_health_destroy_reporters(priv);
+
+ mlx5e_nvmeotcp_cleanup(priv);
mlx5e_tls_cleanup(priv);
mlx5e_ipsec_cleanup(priv);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 833be29170a1..3fc11b71de67 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -37,6 +37,7 @@
#include "en/xsk/rx.h"
#include "en/xsk/tx.h"
#include "en_accel/ktls_txrx.h"
+#include "en_accel/nvmeotcp.h"
static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c)
{
@@ -119,6 +120,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
struct mlx5e_txqsq __rcu **qos_sqs;
struct mlx5e_rq *xskrq = &c->xskrq;
struct mlx5e_rq *rq = &c->rq;
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ struct mlx5e_nvmeotcp_sq *nvmeotcp_sq;
+ struct list_head *cur;
+#endif
bool aff_change = false;
bool busy_xsk = false;
bool busy = false;
@@ -171,6 +176,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
* queueing more WQEs and overflowing the async ICOSQ.
*/
clear_bit(MLX5E_SQ_STATE_PENDING_XSK_TX, &c->async_icosq.state);
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ list_for_each(cur, &c->list_nvmeotcpsq) {
+ nvmeotcp_sq = list_entry(cur, struct mlx5e_nvmeotcp_sq, list);
+ mlx5e_poll_ico_cq(&nvmeotcp_sq->icosq.cq);
+ }
+#endif
/* Keep after async ICOSQ CQ poll */
if (unlikely(mlx5e_ktls_rx_pending_resync_list(c, budget)))
@@ -223,6 +234,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget)
mlx5e_cq_arm(&rq->cq);
mlx5e_cq_arm(&c->icosq.cq);
mlx5e_cq_arm(&c->async_icosq.cq);
+#ifdef CONFIG_MLX5_EN_NVMEOTCP
+ list_for_each(cur, &c->list_nvmeotcpsq) {
+ nvmeotcp_sq = list_entry(cur, struct mlx5e_nvmeotcp_sq, list);
+ mlx5e_cq_arm(&nvmeotcp_sq->icosq.cq);
+ }
+#endif
mlx5e_cq_arm(&c->xdpsq.cq);
if (xsk_open) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 016d26f809a5..a8a14c15a61f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -262,6 +262,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
return err;
}
+ if (MLX5_CAP_GEN(dev, nvmeotcp)) {
+ err = mlx5_core_get_caps(dev, MLX5_CAP_DEV_NVMEOTCP);
+ if (err)
+ return err;
+ }
+
return 0;
}
--
2.24.1
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
next prev parent reply other threads:[~2021-07-22 11:13 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-22 11:02 [PATCH v5 net-next 00/36] nvme-tcp receive and tarnsmit offloads Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 01/36] net: Introduce direct data placement tcp offload Boris Pismenny
2021-07-22 11:26 ` Eric Dumazet
2021-07-22 12:18 ` Boris Pismenny
2021-07-22 13:10 ` Eric Dumazet
2021-07-22 13:33 ` Boris Pismenny
2021-07-22 13:39 ` Eric Dumazet
2021-07-22 14:02 ` Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 02/36] iov_iter: DDP copy to iter/pages Boris Pismenny
2021-07-22 13:31 ` Christoph Hellwig
2021-07-22 20:23 ` Boris Pismenny
2021-07-23 5:03 ` Christoph Hellwig
2021-07-23 5:21 ` Al Viro
2021-08-04 14:13 ` Or Gerlitz
2021-08-10 13:29 ` Or Gerlitz
2021-07-22 20:55 ` Al Viro
2021-07-22 11:02 ` [PATCH v5 net-next 03/36] net: skb copy(+hash) iterators for DDP offloads Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 04/36] net/tls: expose get_netdev_for_sock Boris Pismenny
2021-07-23 6:06 ` Christoph Hellwig
2021-08-04 13:26 ` Or Gerlitz
2021-07-22 11:02 ` [PATCH v5 net-next 05/36] nvme-tcp: Add DDP offload control path Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 06/36] nvme-tcp: Add DDP data-path Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 07/36] nvme-tcp: RX DDGST offload Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 08/36] nvme-tcp: Deal with netdevice DOWN events Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 09/36] net/mlx5: Header file changes for nvme-tcp offload Boris Pismenny
2021-07-22 11:02 ` [PATCH v5 net-next 10/36] net/mlx5: Add 128B CQE for NVMEoTCP offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 11/36] net/mlx5e: TCP flow steering for nvme-tcp Boris Pismenny
2021-07-22 11:03 ` Boris Pismenny [this message]
2021-07-22 11:03 ` [PATCH v5 net-next 13/36] net/mlx5e: KLM UMR helper macros Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 14/36] net/mlx5e: NVMEoTCP use KLM UMRs Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 15/36] net/mlx5e: NVMEoTCP queue init/teardown Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 16/36] net/mlx5e: NVMEoTCP async ddp invalidation Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 17/36] net/mlx5e: NVMEoTCP ddp setup and resync Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 18/36] net/mlx5e: NVMEoTCP, data-path for DDP+DDGST offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 19/36] net/mlx5e: NVMEoTCP statistics Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 20/36] Documentation: add ULP DDP offload documentation Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 21/36] net: drop ULP DDP HW offload feature if no CSUM offload feature Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 22/36] net: Add ulp_ddp_pdu_info struct Boris Pismenny
2021-07-23 19:42 ` Sagi Grimberg
2021-07-22 11:03 ` [PATCH v5 net-next 23/36] net: Add to ulp_ddp support for fallback flow Boris Pismenny
2021-07-23 6:09 ` Christoph Hellwig
2021-07-22 11:03 ` [PATCH v5 net-next 24/36] net: Add MSG_DDP_CRC flag Boris Pismenny
2021-07-22 14:23 ` Eric Dumazet
2021-07-22 11:03 ` [PATCH v5 net-next 25/36] nvme-tcp: TX DDGST offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 26/36] nvme-tcp: Mapping between Tx NVMEoTCP pdu and TCP sequence Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 27/36] mlx5e: make preparation in TLS code for NVMEoTCP CRC Tx offload Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 28/36] mlx5: Add sq state test bit for nvmeotcp Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 29/36] mlx5: Add support to NETIF_F_HW_TCP_DDP_CRC_TX feature Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 30/36] net/mlx5e: NVMEoTCP DDGST TX offload TIS Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 31/36] net/mlx5e: NVMEoTCP DDGST Tx offload queue init/teardown Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 32/36] net/mlx5e: NVMEoTCP DDGST TX BSF and PSV Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 33/36] net/mlx5e: NVMEoTCP DDGST TX Data path Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 34/36] net/mlx5e: NVMEoTCP DDGST TX handle OOO packets Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 35/36] net/mlx5e: NVMEoTCP DDGST TX offload optimization Boris Pismenny
2021-07-22 11:03 ` [PATCH v5 net-next 36/36] net/mlx5e: NVMEoTCP DDGST TX statistics Boris Pismenny
2021-07-23 5:56 ` [PATCH v5 net-next 00/36] nvme-tcp receive and tarnsmit offloads Christoph Hellwig
2021-07-23 19:58 ` Sagi Grimberg
2021-08-04 13:51 ` Or Gerlitz
2021-08-06 19:46 ` Sagi Grimberg
2021-08-10 13:37 ` Or Gerlitz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210722110325.371-13-borisp@nvidia.com \
--to=borisp@nvidia.com \
--cc=axboe@fb.com \
--cc=benishay@nvidia.com \
--cc=boris.pismenny@gmail.com \
--cc=borisp@mellanox.com \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=edumazet@google.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=netdev@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=ogerlitz@nvidia.com \
--cc=saeedm@nvidia.com \
--cc=sagi@grimberg.me \
--cc=smalin@marvell.com \
--cc=viro@zeniv.linux.org.uk \
--cc=yorayz@mellanox.com \
--cc=yorayz@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.