public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed
* [PATCH] nvme/tcp: use in-capsule data for fabrics commands
@ 2022-06-16 14:49 Caleb Sander
  2022-07-07  6:29 ` Sagi Grimberg
  0 siblings, 1 reply; 8+ messages in thread
From: Caleb Sander @ 2022-06-16 14:49 UTC (permalink / raw)
  To: Keith Busch, Jens Axboe, Christoph Hellwig, Sagi Grimberg,
	linux-nvme
  Cc: Caleb Sander

From the NVMe/TCP spec:
> The maximum amount of in-capsule data for Fabrics and Admin Commands
> is 8,192 bytes ... NVMe/TCP controllers must support in-capsule data
> for Fabrics and Admin Command Capsules

Currently, command data is only sent in-capsule on the admin queue
or I/O queues that indicate support for it.
Send fabrics command data in-capsule for I/O queues too to avoid
needing a separate H2CData PDU.

Signed-off-by: Caleb Sander <csander@purestorage.com>
---
 drivers/nvme/host/tcp.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index bb67538d2..f1869ab3c 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -207,12 +207,15 @@ static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
 static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
 {
 	return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
 }
 
-static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
+static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue,
+		struct nvme_tcp_request *req)
 {
+	if (nvme_is_fabrics(req->req.cmd))
+		return NVME_TCP_ADMIN_CCSZ;
 	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
 }
 
 static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
 {
@@ -227,11 +230,11 @@ static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
 		return false; /* async events don't have a request */
 
 	rq = blk_mq_rq_from_pdu(req);
 
 	return rq_data_dir(rq) == WRITE && req->data_len &&
-		req->data_len <= nvme_tcp_inline_data_size(req->queue);
+		req->data_len <= nvme_tcp_inline_data_size(req->queue, req);
 }
 
 static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
 {
 	return req->iter.bvec->bv_page;
@@ -2368,11 +2371,11 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
 	c->common.flags |= NVME_CMD_SGL_METABUF;
 
 	if (!blk_rq_nr_phys_segments(rq))
 		nvme_tcp_set_sg_null(c);
 	else if (rq_data_dir(rq) == WRITE &&
-	    req->data_len <= nvme_tcp_inline_data_size(queue))
+	    req->data_len <= nvme_tcp_inline_data_size(queue, req))
 		nvme_tcp_set_sg_inline(queue, c, req->data_len);
 	else
 		nvme_tcp_set_sg_host_data(c, req->data_len);
 
 	return 0;
@@ -2403,11 +2406,11 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 	req->curr_bio = rq->bio;
 	if (req->curr_bio && req->data_len)
 		nvme_tcp_init_iter(req, rq_data_dir(rq));
 
 	if (rq_data_dir(rq) == WRITE &&
-	    req->data_len <= nvme_tcp_inline_data_size(queue))
+	    req->data_len <= nvme_tcp_inline_data_size(queue, req))
 		req->pdu_len = req->data_len;
 
 	pdu->hdr.type = nvme_tcp_cmd;
 	pdu->hdr.flags = 0;
 	if (queue->hdr_digest)
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] nvme/tcp: use in-capsule data for fabrics commands
  2022-06-16 14:49 [PATCH] nvme/tcp: use in-capsule data for fabrics commands Caleb Sander
@ 2022-07-07  6:29 ` Sagi Grimberg
  2022-07-07 21:12   ` [PATCH v2] nvme-tcp: use in-capsule data for I/O connect Caleb Sander
  0 siblings, 1 reply; 8+ messages in thread
From: Sagi Grimberg @ 2022-07-07  6:29 UTC (permalink / raw)
  To: Caleb Sander, Keith Busch, Jens Axboe, Christoph Hellwig,
	linux-nvme


>> The maximum amount of in-capsule data for Fabrics and Admin Commands
>> is 8,192 bytes ... NVMe/TCP controllers must support in-capsule data
>> for Fabrics and Admin Command Capsules
> 
> Currently, command data is only sent in-capsule on the admin queue
> or I/O queues that indicate support for it.
> Send fabrics command data in-capsule for I/O queues too to avoid
> needing a separate H2CData PDU.

The only fabrics command that is sent on an I/O queue is connect.
But I guess that if ioccsz=0 that would cause connect to use h2cdata...


> 
> Signed-off-by: Caleb Sander <csander@purestorage.com>
> ---
>   drivers/nvme/host/tcp.c | 11 +++++++----
>   1 file changed, 7 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index bb67538d2..f1869ab3c 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -207,12 +207,15 @@ static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
>   static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
>   {
>   	return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
>   }
>   
> -static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
> +static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue,
> +		struct nvme_tcp_request *req)
>   {
> +	if (nvme_is_fabrics(req->req.cmd))
> +		return NVME_TCP_ADMIN_CCSZ;
>   	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
>   }
>   
>   static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
>   {
> @@ -227,11 +230,11 @@ static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
>   		return false; /* async events don't have a request */
>   
>   	rq = blk_mq_rq_from_pdu(req);
>   
>   	return rq_data_dir(rq) == WRITE && req->data_len &&
> -		req->data_len <= nvme_tcp_inline_data_size(req->queue);
> +		req->data_len <= nvme_tcp_inline_data_size(req->queue, req);

We can just pass req and reference the queue from the request itself.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2] nvme-tcp: use in-capsule data for I/O connect
  2022-07-07  6:29 ` Sagi Grimberg
@ 2022-07-07 21:12   ` Caleb Sander
  2022-07-08  5:00     ` Christoph Hellwig
  2022-07-12 15:35     ` Christoph Hellwig
  0 siblings, 2 replies; 8+ messages in thread
From: Caleb Sander @ 2022-07-07 21:12 UTC (permalink / raw)
  To: Sagi Grimberg
  Cc: Keith Busch, Jens Axboe, Christoph Hellwig, linux-nvme,
	Caleb Sander

From the NVMe/TCP spec:
> The maximum amount of in-capsule data for Fabrics and Admin Commands
> is 8,192 bytes ... NVMe/TCP controllers must support in-capsule data
> for Fabrics and Admin Command Capsules

Currently, command data is only sent in-capsule on the admin queue
or I/O queues that indicate support for it.
Send fabrics command data in-capsule for I/O queues too to avoid
needing a separate H2CData PDU for the connect command.

Signed-off-by: Caleb Sander <csander@purestorage.com>
---
 drivers/nvme/host/tcp.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7a9e6ffa2342..307780d2787a 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -207,13 +207,15 @@ static inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
 static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
 {
 	return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
 }

-static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_queue *queue)
+static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req)
 {
-	return queue->cmnd_capsule_len - sizeof(struct nvme_command);
+	if (nvme_is_fabrics(req->req.cmd))
+		return NVME_TCP_ADMIN_CCSZ;
+	return req->queue->cmnd_capsule_len - sizeof(struct nvme_command);
 }

 static inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
 {
 	return req == &req->queue->ctrl->async_req;
@@ -227,11 +229,11 @@ static inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
 		return false; /* async events don't have a request */

 	rq = blk_mq_rq_from_pdu(req);

 	return rq_data_dir(rq) == WRITE && req->data_len &&
-		req->data_len <= nvme_tcp_inline_data_size(req->queue);
+		req->data_len <= nvme_tcp_inline_data_size(req);
 }

 static inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
 {
 	return req->iter.bvec->bv_page;
@@ -2370,11 +2372,11 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
 	c->common.flags |= NVME_CMD_SGL_METABUF;

 	if (!blk_rq_nr_phys_segments(rq))
 		nvme_tcp_set_sg_null(c);
 	else if (rq_data_dir(rq) == WRITE &&
-	    req->data_len <= nvme_tcp_inline_data_size(queue))
+	    req->data_len <= nvme_tcp_inline_data_size(req))
 		nvme_tcp_set_sg_inline(queue, c, req->data_len);
 	else
 		nvme_tcp_set_sg_host_data(c, req->data_len);

 	return 0;
@@ -2405,11 +2407,11 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 	req->curr_bio = rq->bio;
 	if (req->curr_bio && req->data_len)
 		nvme_tcp_init_iter(req, rq_data_dir(rq));

 	if (rq_data_dir(rq) == WRITE &&
-	    req->data_len <= nvme_tcp_inline_data_size(queue))
+	    req->data_len <= nvme_tcp_inline_data_size(req))
 		req->pdu_len = req->data_len;

 	pdu->hdr.type = nvme_tcp_cmd;
 	pdu->hdr.flags = 0;
 	if (queue->hdr_digest)
--
2.25.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-tcp: use in-capsule data for I/O connect
  2022-07-07 21:12   ` [PATCH v2] nvme-tcp: use in-capsule data for I/O connect Caleb Sander
@ 2022-07-08  5:00     ` Christoph Hellwig
  2022-07-08 15:56       ` Caleb Sander
  2022-07-12 15:35     ` Christoph Hellwig
  1 sibling, 1 reply; 8+ messages in thread
From: Christoph Hellwig @ 2022-07-08  5:00 UTC (permalink / raw)
  To: Caleb Sander
  Cc: Sagi Grimberg, Keith Busch, Jens Axboe, Christoph Hellwig,
	linux-nvme

On Thu, Jul 07, 2022 at 03:12:45PM -0600, Caleb Sander wrote:
> >From the NVMe/TCP spec:
> > The maximum amount of in-capsule data for Fabrics and Admin Commands
> > is 8,192 bytes ... NVMe/TCP controllers must support in-capsule data
> > for Fabrics and Admin Command Capsules
> 
> Currently, command data is only sent in-capsule on the admin queue
> or I/O queues that indicate support for it.
> Send fabrics command data in-capsule for I/O queues too to avoid
> needing a separate H2CData PDU for the connect command.

While I'm fine with this change, can you please state here why this
is important?  Is there a use case where it really matters?  A controller
that is unhappy if this doesn't happen?


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-tcp: use in-capsule data for I/O connect
  2022-07-08  5:00     ` Christoph Hellwig
@ 2022-07-08 15:56       ` Caleb Sander
  2022-07-10 11:05         ` Sagi Grimberg
  0 siblings, 1 reply; 8+ messages in thread
From: Caleb Sander @ 2022-07-08 15:56 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Sagi Grimberg, Keith Busch, Jens Axboe, linux-nvme

> While I'm fine with this change, can you please state here why this
> is important?  Is there a use case where it really matters?  A controller
> that is unhappy if this doesn't happen?

It's just an optimization. Without this change, we send the connect command
capsule and data in separate PDUs (CapsuleCmd and H2CData), and must wait for
the controller to respond with an R2T PDU before sending the H2CData.
With the change, we send a single CapsuleCmd PDU that includes the data.
This reduces the number of bytes (and likely packets) sent across the network,
and simplifies the send state machine handling in the driver.

Using in-capsule data does not "really matter" for admin commands either,
but we appear to have decided that the optimization is worth it.
So I am just suggesting we extend the logic to the I/O connect command.


On Thu, Jul 7, 2022 at 10:00 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Thu, Jul 07, 2022 at 03:12:45PM -0600, Caleb Sander wrote:
> > >From the NVMe/TCP spec:
> > > The maximum amount of in-capsule data for Fabrics and Admin Commands
> > > is 8,192 bytes ... NVMe/TCP controllers must support in-capsule data
> > > for Fabrics and Admin Command Capsules
> >
> > Currently, command data is only sent in-capsule on the admin queue
> > or I/O queues that indicate support for it.
> > Send fabrics command data in-capsule for I/O queues too to avoid
> > needing a separate H2CData PDU for the connect command.
>
> While I'm fine with this change, can you please state here why this
> is important?  Is there a use case where it really matters?  A controller
> that is unhappy if this doesn't happen?


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-tcp: use in-capsule data for I/O connect
  2022-07-08 15:56       ` Caleb Sander
@ 2022-07-10 11:05         ` Sagi Grimberg
  2022-07-11 19:18           ` Caleb Sander
  0 siblings, 1 reply; 8+ messages in thread
From: Sagi Grimberg @ 2022-07-10 11:05 UTC (permalink / raw)
  To: Caleb Sander, Christoph Hellwig; +Cc: Keith Busch, Jens Axboe, linux-nvme


>> While I'm fine with this change, can you please state here why this
>> is important?  Is there a use case where it really matters?  A controller
>> that is unhappy if this doesn't happen?
> 
> It's just an optimization. Without this change, we send the connect command
> capsule and data in separate PDUs (CapsuleCmd and H2CData), and must wait for
> the controller to respond with an R2T PDU before sending the H2CData.
> With the change, we send a single CapsuleCmd PDU that includes the data.
> This reduces the number of bytes (and likely packets) sent across the network,
> and simplifies the send state machine handling in the driver.
> 
> Using in-capsule data does not "really matter" for admin commands either,
> but we appear to have decided that the optimization is worth it.
> So I am just suggesting we extend the logic to the I/O connect command.

I think that the question was, does a controller that suffers from this
inefficiency exist? Up until now the controllers seen in the wild all
support ioccsz that accepts IO connect to go in-capsule.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-tcp: use in-capsule data for I/O connect
  2022-07-10 11:05         ` Sagi Grimberg
@ 2022-07-11 19:18           ` Caleb Sander
  0 siblings, 0 replies; 8+ messages in thread
From: Caleb Sander @ 2022-07-11 19:18 UTC (permalink / raw)
  Cc: Christoph Hellwig, Keith Busch, Jens Axboe, linux-nvme

> I think that the question was, does a controller that suffers from this
> inefficiency exist? Up until now the controllers seen in the wild all
> support ioccsz that accepts IO connect to go in-capsule.

No, I don't know of a controller that reports IOCCSZ < 64 + 1024.
I discovered this behavior when writing a mock nvme-tcp target
reporting IOCCSZ = 64.

On Sun, Jul 10, 2022 at 4:05 AM Sagi Grimberg <sagi@grimberg.me> wrote:
>
>
> >> While I'm fine with this change, can you please state here why this
> >> is important?  Is there a use case where it really matters?  A controller
> >> that is unhappy if this doesn't happen?
> >
> > It's just an optimization. Without this change, we send the connect command
> > capsule and data in separate PDUs (CapsuleCmd and H2CData), and must wait for
> > the controller to respond with an R2T PDU before sending the H2CData.
> > With the change, we send a single CapsuleCmd PDU that includes the data.
> > This reduces the number of bytes (and likely packets) sent across the network,
> > and simplifies the send state machine handling in the driver.
> >
> > Using in-capsule data does not "really matter" for admin commands either,
> > but we appear to have decided that the optimization is worth it.
> > So I am just suggesting we extend the logic to the I/O connect command.
>
> I think that the question was, does a controller that suffers from this
> inefficiency exist? Up until now the controllers seen in the wild all
> support ioccsz that accepts IO connect to go in-capsule.


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] nvme-tcp: use in-capsule data for I/O connect
  2022-07-07 21:12   ` [PATCH v2] nvme-tcp: use in-capsule data for I/O connect Caleb Sander
  2022-07-08  5:00     ` Christoph Hellwig
@ 2022-07-12 15:35     ` Christoph Hellwig
  1 sibling, 0 replies; 8+ messages in thread
From: Christoph Hellwig @ 2022-07-12 15:35 UTC (permalink / raw)
  To: Caleb Sander
  Cc: Sagi Grimberg, Keith Busch, Jens Axboe, Christoph Hellwig,
	linux-nvme

Thanks,

applied to nvme-5.20 with an updated commit log based on the reply
in this thread.


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2022-07-12 15:35 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-06-16 14:49 [PATCH] nvme/tcp: use in-capsule data for fabrics commands Caleb Sander
2022-07-07  6:29 ` Sagi Grimberg
2022-07-07 21:12   ` [PATCH v2] nvme-tcp: use in-capsule data for I/O connect Caleb Sander
2022-07-08  5:00     ` Christoph Hellwig
2022-07-08 15:56       ` Caleb Sander
2022-07-10 11:05         ` Sagi Grimberg
2022-07-11 19:18           ` Caleb Sander
2022-07-12 15:35     ` Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox