From: Sagi Grimberg <sagi@grimberg.me>
To: Boris Pismenny <borisp@mellanox.com>,
kuba@kernel.org, davem@davemloft.net, saeedm@nvidia.com,
hch@lst.de, axboe@fb.com, kbusch@kernel.org,
viro@zeniv.linux.org.uk, edumazet@google.com
Cc: Yoray Zack <yorayz@mellanox.com>,
Ben Ben-Ishay <benishay@mellanox.com>,
boris.pismenny@gmail.com, linux-nvme@lists.infradead.org,
netdev@vger.kernel.org, Or Gerlitz <ogerlitz@mellanox.com>
Subject: Re: [PATCH net-next RFC v1 07/10] nvme-tcp : Recalculate crc in the end of the capsule
Date: Thu, 8 Oct 2020 15:44:07 -0700 [thread overview]
Message-ID: <a17cf1ca-4183-8f6c-8470-9d45febb755b@grimberg.me> (raw)
In-Reply-To: <20200930162010.21610-8-borisp@mellanox.com>
> crc offload of the nvme capsule. Check if all the skb bits
> are on, and if not recalculate the crc in SW and check it.
Can you clarify in the patch description that this is only
for pdu data digest and not header digest?
>
> This patch reworks the receive-side crc calculation to always
> run at the end, so as to keep a single flow for both offload
> and non-offload. This change simplifies the code, but it may degrade
> performance for non-offload crc calculation.
??
From my scan it doeesn't look like you do that.. Am I missing something?
Can you explain?
>
> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> Signed-off-by: Ben Ben-Ishay <benishay@mellanox.com>
> Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
> Signed-off-by: Yoray Zack <yorayz@mellanox.com>
> ---
> drivers/nvme/host/tcp.c | 66 ++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 58 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 7bd97f856677..9a620d1dacb4 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -94,6 +94,7 @@ struct nvme_tcp_queue {
> size_t data_remaining;
> size_t ddgst_remaining;
> unsigned int nr_cqe;
> + bool crc_valid;
>
> /* send state */
> struct nvme_tcp_request *request;
> @@ -233,6 +234,41 @@ static inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
> return nvme_tcp_pdu_data_left(req) <= len;
> }
>
> +static inline bool nvme_tcp_device_ddgst_ok(struct nvme_tcp_queue *queue)
Maybe call it nvme_tcp_ddp_ddgst_ok?
> +{
> + return queue->crc_valid;
> +}
> +
> +static inline void nvme_tcp_device_ddgst_update(struct nvme_tcp_queue *queue,
> + struct sk_buff *skb)
Maybe call it nvme_tcp_ddp_ddgst_update?
> +{
> + if (queue->crc_valid)
> +#ifdef CONFIG_TCP_DDP_CRC
> + queue->crc_valid = skb->ddp_crc;
> +#else
> + queue->crc_valid = false;
> +#endif
> +}
> +
> +static void nvme_tcp_crc_recalculate(struct nvme_tcp_queue *queue,
> + struct nvme_tcp_data_pdu *pdu)
Maybe call it nvme_tcp_ddp_ddgst_recalc?
> +{
> + struct nvme_tcp_request *req;
> + struct request *rq;
> +
> + rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
> + if (!rq)
> + return;
> + req = blk_mq_rq_to_pdu(rq);
> + crypto_ahash_init(queue->rcv_hash);
> + req->ddp.sg_table.sgl = req->ddp.first_sgl;
> + /* req->ddp.sg_table is allocated and filled in nvme_tcp_setup_ddp */
> + ahash_request_set_crypt(queue->rcv_hash, req->ddp.sg_table.sgl, NULL,
> + le32_to_cpu(pdu->data_length));
> + crypto_ahash_update(queue->rcv_hash);
> +}
> +
> +
> #ifdef CONFIG_TCP_DDP
>
> bool nvme_tcp_resync_request(struct sock *sk, u32 seq, u32 flags);
> @@ -706,6 +742,7 @@ static void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
> queue->pdu_offset = 0;
> queue->data_remaining = -1;
> queue->ddgst_remaining = 0;
> + queue->crc_valid = true;
> }
>
> static void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
> @@ -955,6 +992,8 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
> struct nvme_tcp_request *req;
> struct request *rq;
>
> + if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
> + nvme_tcp_device_ddgst_update(queue, skb);
Is the queue->data_digest condition missing here?
> rq = blk_mq_tag_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
> if (!rq) {
> dev_err(queue->ctrl->ctrl.device,
> @@ -992,7 +1031,7 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
> recv_len = min_t(size_t, recv_len,
> iov_iter_count(&req->iter));
>
> - if (queue->data_digest)
> + if (queue->data_digest && !test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
> ret = skb_copy_and_hash_datagram_iter(skb, *offset,
> &req->iter, recv_len, queue->rcv_hash);
This is the skb copy and hash, not clear why you say that you move this
to the end...
> else
> @@ -1012,7 +1051,6 @@ static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
>
> if (!queue->data_remaining) {
> if (queue->data_digest) {
> - nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
If I instead do:
if (!test_bit(NVME_TCP_Q_OFFLOADS,
&queue->flags))
nvme_tcp_ddgst_final(queue->rcv_hash,
&queue->exp_ddgst);
Does that help the mess in nvme_tcp_recv_ddgst?
> queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
> } else {
> if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
> @@ -1033,8 +1071,11 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
> char *ddgst = (char *)&queue->recv_ddgst;
> size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
> off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
> + bool ddgst_offload_fail;
> int ret;
>
> + if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags))
> + nvme_tcp_device_ddgst_update(queue, skb);
> ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
> if (unlikely(ret))
> return ret;
> @@ -1045,12 +1086,21 @@ static int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
> if (queue->ddgst_remaining)
> return 0;
>
> - if (queue->recv_ddgst != queue->exp_ddgst) {
> - dev_err(queue->ctrl->ctrl.device,
> - "data digest error: recv %#x expected %#x\n",
> - le32_to_cpu(queue->recv_ddgst),
> - le32_to_cpu(queue->exp_ddgst));
> - return -EIO;
> + ddgst_offload_fail = !nvme_tcp_device_ddgst_ok(queue);
> + if (!test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) ||
> + ddgst_offload_fail) {
> + if (test_bit(NVME_TCP_Q_OFFLOADS, &queue->flags) &&
> + ddgst_offload_fail)
> + nvme_tcp_crc_recalculate(queue, pdu);
> +
> + nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
> + if (queue->recv_ddgst != queue->exp_ddgst) {
> + dev_err(queue->ctrl->ctrl.device,
> + "data digest error: recv %#x expected %#x\n",
> + le32_to_cpu(queue->recv_ddgst),
> + le32_to_cpu(queue->exp_ddgst));
> + return -EIO;
This gets convoluted here...
> + }
> }
>
> if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
>
_______________________________________________
Linux-nvme mailing list
Linux-nvme@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-nvme
next prev parent reply other threads:[~2020-10-08 22:44 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-30 16:20 [PATCH net-next RFC v1 00/10] nvme-tcp receive offloads Boris Pismenny
2020-09-30 16:20 ` [PATCH net-next RFC v1 01/10] iov_iter: Skip copy in memcpy_to_page if src==dst Boris Pismenny
2020-10-08 23:05 ` Sagi Grimberg
2020-09-30 16:20 ` [PATCH net-next RFC v1 02/10] net: Introduce direct data placement tcp offload Boris Pismenny
2020-10-08 21:47 ` Sagi Grimberg
2020-10-11 14:44 ` Boris Pismenny
2020-09-30 16:20 ` [PATCH net-next RFC v1 03/10] net: Introduce crc offload for tcp ddp ulp Boris Pismenny
2020-10-08 21:51 ` Sagi Grimberg
2020-10-11 14:58 ` Boris Pismenny
2020-09-30 16:20 ` [PATCH net-next RFC v1 04/10] net/tls: expose get_netdev_for_sock Boris Pismenny
2020-10-08 21:56 ` Sagi Grimberg
2020-09-30 16:20 ` [PATCH net-next RFC v1 05/10] nvme-tcp: Add DDP offload control path Boris Pismenny
2020-10-08 22:19 ` Sagi Grimberg
2020-10-19 18:28 ` Boris Pismenny
[not found] ` <PH0PR18MB3845430DDF572E0DD4832D06CCED0@PH0PR18MB3845.namprd18.prod.outlook.com>
2020-11-08 6:51 ` Shai Malin
2020-11-09 23:23 ` Sagi Grimberg
2020-11-11 5:12 ` FW: " Shai Malin
2020-11-11 5:43 ` Shai Malin
2020-09-30 16:20 ` [PATCH net-next RFC v1 06/10] nvme-tcp: Add DDP data-path Boris Pismenny
2020-10-08 22:29 ` Sagi Grimberg
2020-10-08 23:00 ` Sagi Grimberg
2020-11-08 13:59 ` Boris Pismenny
2020-11-08 9:44 ` Boris Pismenny
2020-11-09 23:18 ` Sagi Grimberg
2020-09-30 16:20 ` [PATCH net-next RFC v1 07/10] nvme-tcp : Recalculate crc in the end of the capsule Boris Pismenny
2020-10-08 22:44 ` Sagi Grimberg [this message]
[not found] ` <PH0PR18MB3845764B48FD24C87FA34304CCED0@PH0PR18MB3845.namprd18.prod.outlook.com>
[not found] ` <PH0PR18MB38458FD325BD77983D2623D4CCEB0@PH0PR18MB3845.namprd18.prod.outlook.com>
2020-11-08 6:59 ` Shai Malin
2020-11-08 7:28 ` Boris Pismenny
2020-11-08 14:46 ` Boris Pismenny
2020-09-30 16:20 ` [PATCH net-next RFC v1 08/10] nvme-tcp: Deal with netdevice DOWN events Boris Pismenny
2020-10-08 22:47 ` Sagi Grimberg
2020-10-11 6:54 ` Or Gerlitz
2020-09-30 16:20 ` [PATCH net-next RFC v1 09/10] net/mlx5e: Add NVMEoTCP offload Boris Pismenny
2020-09-30 16:20 ` [PATCH net-next RFC v1 10/10] net/mlx5e: NVMEoTCP, data-path for DDP offload Boris Pismenny
2020-10-09 0:08 ` [PATCH net-next RFC v1 00/10] nvme-tcp receive offloads Sagi Grimberg
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=a17cf1ca-4183-8f6c-8470-9d45febb755b@grimberg.me \
--to=sagi@grimberg.me \
--cc=axboe@fb.com \
--cc=benishay@mellanox.com \
--cc=boris.pismenny@gmail.com \
--cc=borisp@mellanox.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=hch@lst.de \
--cc=kbusch@kernel.org \
--cc=kuba@kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=netdev@vger.kernel.org \
--cc=ogerlitz@mellanox.com \
--cc=saeedm@nvidia.com \
--cc=viro@zeniv.linux.org.uk \
--cc=yorayz@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox