From: Boris Pismenny <borisp@mellanox.com>
To: dsahern@gmail.com, kuba@kernel.org, davem@davemloft.net,
saeedm@nvidia.com, hch@lst.de, sagi@grimberg.me, axboe@fb.com,
kbusch@kernel.org, viro@zeniv.linux.org.uk, edumazet@google.com,
smalin@marvell.com
Cc: Yoray Zack <yorayz@mellanox.com>,
yorayz@nvidia.com, boris.pismenny@gmail.com,
Ben Ben-Ishay <benishay@mellanox.com>,
benishay@nvidia.com, linux-nvme@lists.infradead.org,
netdev@vger.kernel.org, Or Gerlitz <ogerlitz@mellanox.com>,
ogerlitz@nvidia.com
Subject: [PATCH v3 net-next 08/21] nvme-tcp : Recalculate crc in the end of the capsule
Date: Mon, 1 Feb 2021 12:04:56 +0200 [thread overview]
Message-ID: <20210201100509.27351-9-borisp@mellanox.com> (raw)
In-Reply-To: <20210201100509.27351-1-borisp@mellanox.com>
From: Ben Ben-ishay <benishay@nvidia.com>
crc offload of the nvme capsule. Check if all the skb bits
are on, and if not recalculate the crc in SW and check it.
This patch reworks the receive-side crc calculation to always
run at the end, so as to keep a single flow for both offload
and non-offload. This change simplifies the code, but it may degrade
performance for non-offload crc calculation.
Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Yoray Zack <yorayz@mellanox.com>
---
drivers/nvme/host/tcp.c | 118 ++++++++++++++++++++++++++++++++--------
1 file changed, 95 insertions(+), 23 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5cb46deb56e0..eb47cf6982d7 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -69,6 +69,7 @@ enum nvme_tcp_queue_flags {
NVME_TCP_Q_LIVE = 1,
NVME_TCP_Q_POLLING = 2,
NVME_TCP_Q_OFF_DDP = 3,
+ NVME_TCP_Q_OFF_DDGST_RX = 4,
};
enum nvme_tcp_recv_state {
@@ -96,6 +97,7 @@ struct nvme_tcp_queue {
size_t data_remaining;
size_t ddgst_remaining;
unsigned int nr_cqe;
+ bool ddgst_valid;
/* send state */
struct nvme_tcp_request *request;
@@ -234,7 +236,56 @@ static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
return nvme_tcp_pdu_data_left(req) <= len;
}
-#ifdef CONFIG_TCP_DDP
+static inline bool nvme_tcp_ddp_ddgst_ok(struct nvme_tcp_queue *queue)
+{
+ return queue->ddgst_valid;
+}
+
+static inline void nvme_tcp_ddp_ddgst_update(struct nvme_tcp_queue *queue,
+ struct sk_buff *skb)
+{
+ if (queue->ddgst_valid)
+#ifdef CONFIG_TCP_DDP_CRC
+ queue->ddgst_valid = skb->ddp_crc;
+#else
+ queue->ddgst_valid = false;
+#endif
+}
+
+static int nvme_tcp_req_map_sg(struct nvme_tcp_request *req, struct request *rq)
+{
+ int ret;
+
+ req->ddp.sg_table.sgl = req->ddp.first_sgl;
+ ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
+ req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
+ if (ret)
+ return -ENOMEM;
+ req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
+ return 0;
+}
+
+static void nvme_tcp_ddp_ddgst_recalc(struct ahash_request *hash,
+ struct request *rq)
+{
+ struct nvme_tcp_request *req;
+
+ if (!rq)
+ return;
+
+ req = blk_mq_rq_to_pdu(rq);
+
+ if (!req->offloaded && nvme_tcp_req_map_sg(req, rq))
+ return;
+
+ crypto_ahash_init(hash);
+ req->ddp.sg_table.sgl = req->ddp.first_sgl;
+ ahash_request_set_crypt(hash, req->ddp.sg_table.sgl, NULL,
+ le32_to_cpu(req->data_len));
+ crypto_ahash_update(hash);
+}
+
+#if defined(CONFIG_TCP_DDP) || defined(CONFIG_TCP_DDP_CRC)
static bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags);
static void nvme_tcp_ddp_teardown_done(void *ddp_ctx);
@@ -290,12 +341,9 @@ int nvme_tcp_setup_ddp(struct nvme_tcp_queue *queue,
}
req->ddp.command_id = command_id;
- req->ddp.sg_table.sgl = req->ddp.first_sgl;
- ret = sg_alloc_table_chained(&req->ddp.sg_table, blk_rq_nr_phys_segments(rq),
- req->ddp.sg_table.sgl, SG_CHUNK_SIZE);
+ ret = nvme_tcp_req_map_sg(req, rq);
if (ret)
return -ENOMEM;
- req->ddp.nents = blk_rq_map_sg(rq->q, rq, req->ddp.sg_table.sgl);
ret = netdev->tcp_ddp_ops->tcp_ddp_setup(netdev,
queue->sock->sk,
@@ -317,7 +365,7 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
return -ENODEV;
}
- if (!(netdev->features & NETIF_F_HW_TCP_DDP)) {
+ if (!(netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX))) {
dev_put(netdev);
return -EOPNOTSUPP;
}
@@ -345,6 +393,9 @@ int nvme_tcp_offload_socket(struct nvme_tcp_queue *queue)
if (netdev->features & NETIF_F_HW_TCP_DDP)
set_bit(NVME_TCP_Q_OFF_DDP, &queue->flags);
+ if (netdev->features & NETIF_F_HW_TCP_DDP_CRC_RX)
+ set_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
+
return ret;
}
@@ -376,7 +427,7 @@ int nvme_tcp_offload_limits(struct nvme_tcp_queue *queue)
return -ENODEV;
}
- if (netdev->features & NETIF_F_HW_TCP_DDP &&
+ if ((netdev->features & (NETIF_F_HW_TCP_DDP | NETIF_F_HW_TCP_DDP_CRC_RX)) &&
netdev->tcp_ddp_ops &&
netdev->tcp_ddp_ops->tcp_ddp_limits)
ret = netdev->tcp_ddp_ops->tcp_ddp_limits(netdev, &limits);
@@ -739,6 +790,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
queue->pdu_offset = 0;
queue->data_remaining = -1;
queue->ddgst_remaining = 0;
+ queue->ddgst_valid = true;
}
static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
@@ -919,7 +971,8 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
u64 pdu_seq = TCP_SKB_CB(skb)->seq + *offset - queue->pdu_offset;
- if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
+ if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
+ test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
nvme_tcp_resync_response(queue, pdu_seq);
ret = skb_copy_bits(skb, *offset,
@@ -988,6 +1041,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
struct nvme_tcp_request *req;
struct request *rq;
+ if (queue->data_digest && test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
+ nvme_tcp_ddp_ddgst_update(queue, skb);
rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
if (!rq) {
dev_err(queue->ctrl->ctrl.device,
@@ -1025,15 +1080,17 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
recv_len = min_t(size_t, recv_len,
iov_iter_count(&req->iter));
- if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) {
- if (queue->data_digest)
+ if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags)) {
+ if (queue->data_digest &&
+ !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
ret = skb_ddp_copy_and_hash_datagram_iter(skb, *offset,
&req->iter, recv_len, queue->rcv_hash);
else
ret = skb_ddp_copy_datagram_iter(skb, *offset,
&req->iter, recv_len);
} else {
- if (queue->data_digest)
+ if (queue->data_digest &&
+ !test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
ret = skb_copy_and_hash_datagram_iter(skb, *offset,
&req->iter, recv_len, queue->rcv_hash);
else
@@ -1055,7 +1112,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
if (!queue->data_remaining) {
if (queue->data_digest) {
- nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
@@ -1076,8 +1132,12 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
char *ddgst = (char *)&queue->recv_ddgst;
size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
+ bool offload_fail, offload_en;
+ struct request *rq = NULL;
int ret;
+ if (test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
+ nvme_tcp_ddp_ddgst_update(queue, skb);
ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
if (unlikely(ret))
return ret;
@@ -1088,17 +1148,29 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
if (queue->ddgst_remaining)
return 0;
- if (queue->recv_ddgst != queue->exp_ddgst) {
- dev_err(queue->ctrl->ctrl.device,
- "data digest error: recv %#x expected %#x\n",
- le32_to_cpu(queue->recv_ddgst),
- le32_to_cpu(queue->exp_ddgst));
- return -EIO;
+ offload_fail = !nvme_tcp_ddp_ddgst_ok(queue);
+ offload_en = test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags);
+ if (!offload_en || offload_fail) {
+ if (offload_en && offload_fail) { // software-fallback
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);
+ nvme_tcp_ddp_ddgst_recalc(queue->rcv_hash, rq);
+ }
+
+ nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
+ if (queue->recv_ddgst != queue->exp_ddgst) {
+ dev_err(queue->ctrl->ctrl.device,
+ "data digest error: recv %#x expected %#x\n",
+ le32_to_cpu(queue->recv_ddgst),
+ le32_to_cpu(queue->exp_ddgst));
+ return -EIO;
+ }
}
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- struct request *rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
- pdu->command_id);
+ if (!rq)
+ rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);
nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
queue->nr_cqe++;
@@ -1841,8 +1913,10 @@ static void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
nvme_tcp_restore_sock_calls(queue);
cancel_work_sync(&queue->io_work);
- if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags))
+ if (test_bit(NVME_TCP_Q_OFF_DDP, &queue->flags) ||
+ test_bit(NVME_TCP_Q_OFF_DDGST_RX, &queue->flags))
nvme_tcp_unoffload_socket(queue);
+
}
static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
@@ -1970,8 +2044,6 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
{
int ret;
- to_tcp_ctrl(ctrl)->offloading_netdev = NULL;
-
ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
if (ret)
return ret;
--
2.24.1
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
next prev parent reply other threads:[~2021-02-01 10:17 UTC|newest]
Thread overview: 44+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-02-01 10:04 [PATCH v3 net-next 00/21] nvme-tcp receive offloads Boris Pismenny
2021-02-01 10:04 ` [PATCH v3 net-next 01/21] iov_iter: Introduce new procedures for copy to iter/pages Boris Pismenny
2021-02-01 17:35 ` Christoph Hellwig
2021-02-02 18:00 ` Or Gerlitz
2021-02-03 16:56 ` Christoph Hellwig
2021-02-03 19:34 ` Ira Weiny
2021-02-07 14:13 ` Boris Pismenny
2021-02-07 14:24 ` Boris Pismenny
2021-02-01 10:04 ` [PATCH v3 net-next 02/21] net: Introduce direct data placement tcp offload Boris Pismenny
2021-02-02 10:06 ` Tom Parkin
2021-02-01 10:04 ` [PATCH v3 net-next 03/21] net: Introduce crc offload for tcp ddp ulp Boris Pismenny
2021-02-01 10:04 ` [PATCH v3 net-next 04/21] net: SKB copy(+hash) iterators for DDP offloads Boris Pismenny
2021-02-01 10:04 ` [PATCH v3 net-next 05/21] net/tls: expose get_netdev_for_sock Boris Pismenny
2021-02-01 10:04 ` [PATCH v3 net-next 06/21] nvme-tcp: Add DDP offload control path Boris Pismenny
2021-02-01 17:37 ` Christoph Hellwig
2021-02-02 18:09 ` Or Gerlitz
2021-02-03 9:17 ` Sagi Grimberg
2021-02-01 10:04 ` [PATCH v3 net-next 07/21] nvme-tcp: Add DDP data-path Boris Pismenny
2021-02-01 17:37 ` Christoph Hellwig
2021-02-02 18:14 ` Or Gerlitz
2021-02-03 8:56 ` Sagi Grimberg
2021-02-03 10:02 ` Christoph Hellwig
2021-02-03 10:21 ` Sagi Grimberg
2021-02-03 8:51 ` Sagi Grimberg
2021-02-04 19:20 ` Or Gerlitz
2021-02-01 10:04 ` Boris Pismenny [this message]
2021-02-03 9:06 ` [PATCH v3 net-next 08/21] nvme-tcp : Recalculate crc in the end of the capsule Sagi Grimberg
2021-02-04 18:36 ` Or Gerlitz
2021-02-07 16:40 ` Or Gerlitz
2021-02-01 10:04 ` [PATCH v3 net-next 09/21] nvme-tcp: Deal with netdevice DOWN events Boris Pismenny
2021-02-03 9:09 ` Sagi Grimberg
2021-02-04 18:29 ` Or Gerlitz
2021-02-01 10:04 ` [PATCH v3 net-next 10/21] net/mlx5: Header file changes for nvme-tcp offload Boris Pismenny
2021-02-01 10:04 ` [PATCH v3 net-next 11/21] net/mlx5: Add 128B CQE for NVMEoTCP offload Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 12/21] net/mlx5e: TCP flow steering for nvme-tcp Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 13/21] net/mlx5e: NVMEoTCP offload initialization Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 14/21] net/mlx5e: KLM UMR helper macros Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 15/21] net/mlx5e: NVMEoTCP use KLM UMRs Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 16/21] net/mlx5e: NVMEoTCP queue init/teardown Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 17/21] net/mlx5e: NVMEoTCP async ddp invalidation Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 18/21] net/mlx5e: NVMEoTCP ddp setup and resync Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 19/21] net/mlx5e: NVMEoTCP, data-path for DDP+CRC offload Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 20/21] net/mlx5e: NVMEoTCP statistics Boris Pismenny
2021-02-01 10:05 ` [PATCH v3 net-next 21/21] Documentation: add TCP DDP offload documentation Boris Pismenny
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210201100509.27351-9-borisp@mellanox.com \
--to=borisp@mellanox.com \
--cc=axboe@fb.com \
--cc=benishay@mellanox.com \
--cc=benishay@nvidia.com \
--cc=boris.pismenny@gmail.com \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=edumazet@google.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=netdev@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=ogerlitz@nvidia.com \
--cc=saeedm@nvidia.com \
--cc=sagi@grimberg.me \
--cc=smalin@marvell.com \
--cc=viro@zeniv.linux.org.uk \
--cc=yorayz@mellanox.com \
--cc=yorayz@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox