* [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA
@ 2021-11-22 10:25 Varun Prakash
2021-11-22 10:35 ` Sagi Grimberg
0 siblings, 1 reply; 4+ messages in thread
From: Varun Prakash @ 2021-11-22 10:25 UTC (permalink / raw)
To: sagi, hch, kbusch; +Cc: linux-nvme, varun
As per NVMe/TCP specification (revision 1.0a, section 3.6.2.3)
Maximum Host to Controller Data length (MAXH2CDATA): Specifies the
maximum number of PDU-Data bytes per H2CData PDU in bytes. This value
is a multiple of dwords and should be no less than 4,096.
Current code sets H2CData PDU data_length to r2t_length,
it does not check MAXH2CDATA value. Fix this by setting H2CData PDU
data_length to min(req->h2cdata_left, queue->maxh2cdata).
Also validate MAXH2CDATA value returned by target in ICResp PDU,
if it is not a multiple of dword or if it is less than 4096 return
-EINVAL from nvme_tcp_init_connection().
Signed-off-by: Varun Prakash <varun@chelsio.com>
---
v2:
removed nvme_tcp_update_h2c_data_pdu()
used sock_no_sendpage() instead of kernel_sendmsg()
drivers/nvme/host/tcp.c | 41 +++++++++++++++++++++++++++++++++++------
include/linux/nvme-tcp.h | 1 +
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 92e07d2..82f8926 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -97,6 +97,7 @@ struct nvme_tcp_queue {
struct nvme_tcp_request *request;
int queue_size;
+ u32 maxh2cdata;
size_t cmnd_capsule_len;
struct nvme_tcp_ctrl *ctrl;
unsigned long flags;
@@ -582,7 +583,7 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
u8 hdgst = nvme_tcp_hdgst_len(queue);
u8 ddgst = nvme_tcp_ddgst_len(queue);
- req->pdu_len = req->h2cdata_left;
+ req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
req->pdu_sent = 0;
memset(data, 0, sizeof(*data));
@@ -933,6 +934,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
int req_data_len = req->data_len;
+ u32 h2cdata_left = req->h2cdata_left;
while (true) {
struct page *page = nvme_tcp_req_cur_page(req);
@@ -977,7 +979,13 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
req->state = NVME_TCP_SEND_DDGST;
req->offset = 0;
} else {
- nvme_tcp_done_send_req(queue);
+ if (h2cdata_left) {
+ nvme_tcp_setup_h2c_data_pdu(req);
+ req->state = NVME_TCP_SEND_H2C_PDU;
+ req->offset = 0;
+ } else {
+ nvme_tcp_done_send_req(queue);
+ }
}
return 1;
}
@@ -1028,16 +1036,21 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_data_pdu *pdu = req->pdu;
+ struct page *page = virt_to_page(pdu);
+ size_t offset = offset_in_page(pdu) + req->offset;
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) - req->offset + hdgst;
+ int flags = MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST;
int ret;
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
- ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
- offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
+ if (req->h2cdata_left)
+ ret = sock_no_sendpage(queue->sock, page, offset, len, flags);
+ else
+ ret = kernel_sendpage(queue->sock, page, offset, len, flags);
+
if (unlikely(ret <= 0))
return ret;
@@ -1057,6 +1070,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
{
struct nvme_tcp_queue *queue = req->queue;
size_t offset = req->offset;
+ u32 h2cdata_left = req->h2cdata_left;
int ret;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
struct kvec iov = {
@@ -1074,7 +1088,13 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
return ret;
if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
- nvme_tcp_done_send_req(queue);
+ if (h2cdata_left) {
+ nvme_tcp_setup_h2c_data_pdu(req);
+ req->state = NVME_TCP_SEND_H2C_PDU;
+ req->offset = 0;
+ } else {
+ nvme_tcp_done_send_req(queue);
+ }
return 1;
}
@@ -1260,6 +1280,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
struct msghdr msg = {};
struct kvec iov;
bool ctrl_hdgst, ctrl_ddgst;
+ u32 maxh2cdata;
int ret;
icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
@@ -1343,6 +1364,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
goto free_icresp;
}
+ maxh2cdata = le32_to_cpu(icresp->maxdata);
+ if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
+ pr_err("queue %d: invalid maxh2cdata returned %u\n",
+ nvme_tcp_queue_id(queue), maxh2cdata);
+ goto free_icresp;
+ }
+ queue->maxh2cdata = maxh2cdata;
+
ret = 0;
free_icresp:
kfree(icresp);
diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
index 959e0bd..7547015 100644
--- a/include/linux/nvme-tcp.h
+++ b/include/linux/nvme-tcp.h
@@ -12,6 +12,7 @@
#define NVME_TCP_DISC_PORT 8009
#define NVME_TCP_ADMIN_CCSZ SZ_8K
#define NVME_TCP_DIGEST_LENGTH 4
+#define NVME_TCP_MIN_MAXH2CDATA 4096
enum nvme_tcp_pfv {
NVME_TCP_PFV_1_0 = 0x0,
--
2.0.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA
2021-11-22 10:25 [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA Varun Prakash
@ 2021-11-22 10:35 ` Sagi Grimberg
2021-11-23 8:33 ` Varun Prakash
0 siblings, 1 reply; 4+ messages in thread
From: Sagi Grimberg @ 2021-11-22 10:35 UTC (permalink / raw)
To: Varun Prakash, hch, kbusch; +Cc: linux-nvme
On 11/22/21 12:25 PM, Varun Prakash wrote:
> As per NVMe/TCP specification (revision 1.0a, section 3.6.2.3)
> Maximum Host to Controller Data length (MAXH2CDATA): Specifies the
> maximum number of PDU-Data bytes per H2CData PDU in bytes. This value
> is a multiple of dwords and should be no less than 4,096.
>
> Current code sets H2CData PDU data_length to r2t_length,
> it does not check MAXH2CDATA value. Fix this by setting H2CData PDU
> data_length to min(req->h2cdata_left, queue->maxh2cdata).
>
> Also validate MAXH2CDATA value returned by target in ICResp PDU,
> if it is not a multiple of dword or if it is less than 4096 return
> -EINVAL from nvme_tcp_init_connection().
>
> Signed-off-by: Varun Prakash <varun@chelsio.com>
> ---
> v2:
> removed nvme_tcp_update_h2c_data_pdu()
> used sock_no_sendpage() instead of kernel_sendmsg()
>
> drivers/nvme/host/tcp.c | 41 +++++++++++++++++++++++++++++++++++------
> include/linux/nvme-tcp.h | 1 +
> 2 files changed, 36 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index 92e07d2..82f8926 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -97,6 +97,7 @@ struct nvme_tcp_queue {
> struct nvme_tcp_request *request;
>
> int queue_size;
> + u32 maxh2cdata;
> size_t cmnd_capsule_len;
> struct nvme_tcp_ctrl *ctrl;
> unsigned long flags;
> @@ -582,7 +583,7 @@ static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
> u8 hdgst = nvme_tcp_hdgst_len(queue);
> u8 ddgst = nvme_tcp_ddgst_len(queue);
>
> - req->pdu_len = req->h2cdata_left;
> + req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
> req->pdu_sent = 0;
>
> memset(data, 0, sizeof(*data));
> @@ -933,6 +934,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
> {
> struct nvme_tcp_queue *queue = req->queue;
> int req_data_len = req->data_len;
> + u32 h2cdata_left = req->h2cdata_left;
No need for the local variable, the reference only happens when the
pdu data transfer is completed.
>
> while (true) {
> struct page *page = nvme_tcp_req_cur_page(req);
> @@ -977,7 +979,13 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
> req->state = NVME_TCP_SEND_DDGST;
> req->offset = 0;
> } else {
> - nvme_tcp_done_send_req(queue);
> + if (h2cdata_left) {
> + nvme_tcp_setup_h2c_data_pdu(req);
> + req->state = NVME_TCP_SEND_H2C_PDU;
> + req->offset = 0;
req->state and req->offset setting can move to
nvme_tcp_setup_h2c_data_pdu() as commented in patch 1.
> + } else {
> + nvme_tcp_done_send_req(queue);
> + }
> }
> return 1;
> }
> @@ -1028,16 +1036,21 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
> {
> struct nvme_tcp_queue *queue = req->queue;
> struct nvme_tcp_data_pdu *pdu = req->pdu;
> + struct page *page = virt_to_page(pdu);
> + size_t offset = offset_in_page(pdu) + req->offset;
> u8 hdgst = nvme_tcp_hdgst_len(queue);
> int len = sizeof(*pdu) - req->offset + hdgst;
> + int flags = MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST;
> int ret;
Not sure the local variables are needed, but OK, I'm fine with that.
>
> if (queue->hdr_digest && !req->offset)
> nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
>
> - ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
> - offset_in_page(pdu) + req->offset, len,
> - MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
> + if (req->h2cdata_left)
> + ret = sock_no_sendpage(queue->sock, page, offset, len, flags);
> + else
> + ret = kernel_sendpage(queue->sock, page, offset, len, flags);
> +
> if (unlikely(ret <= 0))
> return ret;
>
> @@ -1057,6 +1070,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
> {
> struct nvme_tcp_queue *queue = req->queue;
> size_t offset = req->offset;
> + u32 h2cdata_left = req->h2cdata_left;
No need for local variable.
> int ret;
> struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
> struct kvec iov = {
> @@ -1074,7 +1088,13 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
> return ret;
>
> if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
> - nvme_tcp_done_send_req(queue);
> + if (h2cdata_left) {
> + nvme_tcp_setup_h2c_data_pdu(req);
> + req->state = NVME_TCP_SEND_H2C_PDU;
> + req->offset = 0;
> + } else {
> + nvme_tcp_done_send_req(queue);
> + }
> return 1;
> }
>
> @@ -1260,6 +1280,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
> struct msghdr msg = {};
> struct kvec iov;
> bool ctrl_hdgst, ctrl_ddgst;
> + u32 maxh2cdata;
> int ret;
>
> icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
> @@ -1343,6 +1364,14 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
> goto free_icresp;
> }
>
> + maxh2cdata = le32_to_cpu(icresp->maxdata);
> + if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
> + pr_err("queue %d: invalid maxh2cdata returned %u\n",
> + nvme_tcp_queue_id(queue), maxh2cdata);
> + goto free_icresp;
> + }
> + queue->maxh2cdata = maxh2cdata;
> +
> ret = 0;
> free_icresp:
> kfree(icresp);
> diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h
> index 959e0bd..7547015 100644
> --- a/include/linux/nvme-tcp.h
> +++ b/include/linux/nvme-tcp.h
> @@ -12,6 +12,7 @@
> #define NVME_TCP_DISC_PORT 8009
> #define NVME_TCP_ADMIN_CCSZ SZ_8K
> #define NVME_TCP_DIGEST_LENGTH 4
> +#define NVME_TCP_MIN_MAXH2CDATA 4096
>
> enum nvme_tcp_pfv {
> NVME_TCP_PFV_1_0 = 0x0,
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA
2021-11-22 10:35 ` Sagi Grimberg
@ 2021-11-23 8:33 ` Varun Prakash
2021-11-23 9:09 ` Sagi Grimberg
0 siblings, 1 reply; 4+ messages in thread
From: Varun Prakash @ 2021-11-23 8:33 UTC (permalink / raw)
To: Sagi Grimberg; +Cc: hch, kbusch, linux-nvme
On Mon, Nov 22, 2021 at 12:35:11PM +0200, Sagi Grimberg wrote:
> >@@ -933,6 +934,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
> > {
> > struct nvme_tcp_queue *queue = req->queue;
> > int req_data_len = req->data_len;
> >+ u32 h2cdata_left = req->h2cdata_left;
>
> No need for the local variable, the reference only happens when the
> pdu data transfer is completed.
As this function also executes for inline data it is possible that completion
gets processed and request gets allocated for new cmd before
if (req->h2cdata_left) check, nvme_tcp_setup_cmd_pdu() will set
req->h2cdata_left to 0 so it will work but it does not look correct
to me. IMO we should use local variable here.
>
> > while (true) {
> > struct page *page = nvme_tcp_req_cur_page(req);
> >@@ -977,7 +979,13 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
> > req->state = NVME_TCP_SEND_DDGST;
> > req->offset = 0;
> > } else {
> >- nvme_tcp_done_send_req(queue);
> >+ if (h2cdata_left) {
> >+ nvme_tcp_setup_h2c_data_pdu(req);
> >+ req->state = NVME_TCP_SEND_H2C_PDU;
> >+ req->offset = 0;
>
> >@@ -1028,16 +1036,21 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
> > {
> > struct nvme_tcp_queue *queue = req->queue;
> > struct nvme_tcp_data_pdu *pdu = req->pdu;
> >+ struct page *page = virt_to_page(pdu);
> >+ size_t offset = offset_in_page(pdu) + req->offset;
> > u8 hdgst = nvme_tcp_hdgst_len(queue);
> > int len = sizeof(*pdu) - req->offset + hdgst;
> >+ int flags = MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST;
> > int ret;
>
> Not sure the local variables are needed, but OK, I'm fine with that.
I will remove these local variables in next patch.
> >@@ -1057,6 +1070,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
> > {
> > struct nvme_tcp_queue *queue = req->queue;
> > size_t offset = req->offset;
> >+ u32 h2cdata_left = req->h2cdata_left;
>
> No need for local variable.
Same explanation as above for local variable.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA
2021-11-23 8:33 ` Varun Prakash
@ 2021-11-23 9:09 ` Sagi Grimberg
0 siblings, 0 replies; 4+ messages in thread
From: Sagi Grimberg @ 2021-11-23 9:09 UTC (permalink / raw)
To: Varun Prakash; +Cc: hch, kbusch, linux-nvme
>>> @@ -933,6 +934,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
>>> {
>>> struct nvme_tcp_queue *queue = req->queue;
>>> int req_data_len = req->data_len;
>>> + u32 h2cdata_left = req->h2cdata_left;
>>
>> No need for the local variable, the reference only happens when the
>> pdu data transfer is completed.
>
> As this function also executes for inline data it is possible that completion
> gets processed and request gets allocated for new cmd before
> if (req->h2cdata_left) check, nvme_tcp_setup_cmd_pdu() will set
> req->h2cdata_left to 0 so it will work but it does not look correct
> to me. IMO we should use local variable here.
I see. that's fine then.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2021-11-23 9:09 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-11-22 10:25 [PATCH v2 2/2] nvme-tcp: send H2CData PDUs based on MAXH2CDATA Varun Prakash
2021-11-22 10:35 ` Sagi Grimberg
2021-11-23 8:33 ` Varun Prakash
2021-11-23 9:09 ` Sagi Grimberg
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox