[PATCH V2 0/2] nvmet-tcp: fix receive path error handling and state machine

public inbox for linux-nvme@lists.infradead.org
 help / color / mirror / Atom feed

* [PATCH V2 0/2] nvmet-tcp: fix receive path error handling and state machine
@ 2026-03-12 13:40 Maurizio Lombardi
  2026-03-12 13:40 ` [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers Maurizio Lombardi
  2026-03-12 13:40 ` [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error() Maurizio Lombardi
  0 siblings, 2 replies; 10+ messages in thread
From: Maurizio Lombardi @ 2026-03-12 13:40 UTC (permalink / raw)
  To: kbusch; +Cc: linux-nvme, dwagner, yjshin0438, hare, sagi, chaitanyak, mlombard

Patch 1 fixes a potential issue where network data could be read into an
uninitialized iterator. Currently, nvmet_tcp_build_pdu_iovec() returns void,
meaning callers are unaware if an out-of-bounds PDU length or offset triggers
an early return. Consequently, callers blindly overwrite the queue state to
NVMET_TCP_RECV_DATA. This patch modifies the function to return an error code,
shifting the handling responsibility to the callers to ensure proper socket
teardown.

Patch 2 cleans up redundant, localized calls to nvmet_tcp_fatal_error() scattered
across the receive path. It delegates the responsibility of executing the fatal
error function to the top-level caller by bubbling up the error codes.

V2: some cosmetic changes to Patch 1, no functional changes.

Maurizio Lombardi (2):
  nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers
  nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error()

 drivers/nvme/target/tcp.c | 73 ++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 39 deletions(-)

-- 
2.53.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers
  2026-03-12 13:40 [PATCH V2 0/2] nvmet-tcp: fix receive path error handling and state machine Maurizio Lombardi
@ 2026-03-12 13:40 ` Maurizio Lombardi
  2026-03-13  7:10   ` Hannes Reinecke
  2026-03-12 13:40 ` [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error() Maurizio Lombardi
  1 sibling, 1 reply; 10+ messages in thread
From: Maurizio Lombardi @ 2026-03-12 13:40 UTC (permalink / raw)
  To: kbusch; +Cc: linux-nvme, dwagner, yjshin0438, hare, sagi, chaitanyak, mlombard

Currently, when nvmet_tcp_build_pdu_iovec() detects an out-of-bounds
PDU length or offset, it triggers nvmet_tcp_fatal_error(cmd->queue)
and returns early. However, because the function returns void, the
callers are entirely unaware that a fatal error has occurred and
that the cmd->recv_msg.msg_iter was left uninitialized.

Callers such as nvmet_tcp_handle_h2c_data_pdu() proceed to blindly
overwrite the queue state with queue->rcv_state = NVMET_TCP_RECV_DATA
Consequently, the socket receiving loop may attempt to read incoming
network data into the uninitialized iterator.

Fix this by shifting the error handling responsibility to the callers.

Fixes: 52a0a9854934 ("nvmet-tcp: add bounds checks in nvmet_tcp_build_pdu_iovec")
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
---
 drivers/nvme/target/tcp.c | 51 ++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index acc71a26733f..1fbf12df1183 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -351,7 +351,7 @@ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
 
 static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue);
 
-static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
+static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
 {
 	struct bio_vec *iov = cmd->iov;
 	struct scatterlist *sg;
@@ -364,22 +364,19 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
 	offset = cmd->rbytes_done;
 	cmd->sg_idx = offset / PAGE_SIZE;
 	sg_offset = offset % PAGE_SIZE;
-	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt) {
-		nvmet_tcp_fatal_error(cmd->queue);
-		return;
-	}
+	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt)
+		return -EPROTO;
+
 	sg = &cmd->req.sg[cmd->sg_idx];
 	sg_remaining = cmd->req.sg_cnt - cmd->sg_idx;
 
 	while (length) {
-		if (!sg_remaining) {
-			nvmet_tcp_fatal_error(cmd->queue);
-			return;
-		}
-		if (!sg->length || sg->length <= sg_offset) {
-			nvmet_tcp_fatal_error(cmd->queue);
-			return;
-		}
+		if (!sg_remaining)
+			return -EPROTO;
+
+		if (!sg->length || sg->length <= sg_offset)
+			return -EPROTO;
+
 		u32 iov_len = min_t(u32, length, sg->length - sg_offset);
 
 		bvec_set_page(iov, sg_page(sg), iov_len,
@@ -394,6 +391,7 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
 
 	iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov,
 		      nr_pages, cmd->pdu_len);
+	return 0;
 }
 
 static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
@@ -931,7 +929,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
 	return 0;
 }
 
-static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
+static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
 		struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
 {
 	size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
@@ -947,19 +945,23 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
 	if (!nvme_is_write(cmd->req.cmd) || !data_len ||
 	    data_len > cmd->req.port->inline_data_size) {
 		nvmet_prepare_receive_pdu(queue);
-		return;
+		return 0;
 	}
 
 	ret = nvmet_tcp_map_data(cmd);
 	if (unlikely(ret)) {
 		pr_err("queue %d: failed to map data\n", queue->idx);
 		nvmet_tcp_fatal_error(queue);
-		return;
+		return -EPROTO;
 	}
 
 	queue->rcv_state = NVMET_TCP_RECV_DATA;
-	nvmet_tcp_build_pdu_iovec(cmd);
 	cmd->flags |= NVMET_TCP_F_INIT_FAILED;
+	ret = nvmet_tcp_build_pdu_iovec(cmd);
+	if (unlikely(ret))
+		pr_err("queue %d: failed to build PDU iovec\n", queue->idx);
+
+	return ret;
 }
 
 static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
@@ -1011,7 +1013,10 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
 		goto err_proto;
 	}
 	cmd->pdu_recv = 0;
-	nvmet_tcp_build_pdu_iovec(cmd);
+	if (unlikely(nvmet_tcp_build_pdu_iovec(cmd))) {
+		pr_err("queue %d: failed to build PDU iovec\n", queue->idx);
+		goto err_proto;
+	}
 	queue->cmd = cmd;
 	queue->rcv_state = NVMET_TCP_RECV_DATA;
 
@@ -1074,8 +1079,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 			le32_to_cpu(req->cmd->common.dptr.sgl.length),
 			le16_to_cpu(req->cqe->status));
 
-		nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
-		return 0;
+		return nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
 	}
 
 	ret = nvmet_tcp_map_data(queue->cmd);
@@ -1092,8 +1096,11 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 	if (nvmet_tcp_need_data_in(queue->cmd)) {
 		if (nvmet_tcp_has_inline_data(queue->cmd)) {
 			queue->rcv_state = NVMET_TCP_RECV_DATA;
-			nvmet_tcp_build_pdu_iovec(queue->cmd);
-			return 0;
+			ret = nvmet_tcp_build_pdu_iovec(queue->cmd);
+			if (unlikely(ret))
+				pr_err("queue %d: failed to build PDU iovec\n",
+					queue->idx);
+			return ret;
 		}
 		/* send back R2T */
 		nvmet_tcp_queue_response(&queue->cmd->req);
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers
  2026-03-12 13:40 ` [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers Maurizio Lombardi
@ 2026-03-13  7:10   ` Hannes Reinecke
  2026-03-13  7:45     ` Maurizio Lombardi
  0 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2026-03-13  7:10 UTC (permalink / raw)
  To: Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak, mlombard

On 3/12/26 14:40, Maurizio Lombardi wrote:
> Currently, when nvmet_tcp_build_pdu_iovec() detects an out-of-bounds
> PDU length or offset, it triggers nvmet_tcp_fatal_error(cmd->queue)
> and returns early. However, because the function returns void, the
> callers are entirely unaware that a fatal error has occurred and
> that the cmd->recv_msg.msg_iter was left uninitialized.
> 
> Callers such as nvmet_tcp_handle_h2c_data_pdu() proceed to blindly
> overwrite the queue state with queue->rcv_state = NVMET_TCP_RECV_DATA
> Consequently, the socket receiving loop may attempt to read incoming
> network data into the uninitialized iterator.
> 
> Fix this by shifting the error handling responsibility to the callers.
> 
> Fixes: 52a0a9854934 ("nvmet-tcp: add bounds checks in nvmet_tcp_build_pdu_iovec")
> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
> ---
>   drivers/nvme/target/tcp.c | 51 ++++++++++++++++++++++-----------------
>   1 file changed, 29 insertions(+), 22 deletions(-)
> 
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index acc71a26733f..1fbf12df1183 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -351,7 +351,7 @@ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
>   
>   static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue);
>   
> -static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
> +static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>   {
>   	struct bio_vec *iov = cmd->iov;
>   	struct scatterlist *sg;
> @@ -364,22 +364,19 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>   	offset = cmd->rbytes_done;
>   	cmd->sg_idx = offset / PAGE_SIZE;
>   	sg_offset = offset % PAGE_SIZE;
> -	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt) {
> -		nvmet_tcp_fatal_error(cmd->queue);
> -		return;
> -	}
> +	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt)
> +		return -EPROTO;
> +
>   	sg = &cmd->req.sg[cmd->sg_idx];
>   	sg_remaining = cmd->req.sg_cnt - cmd->sg_idx;
>   
>   	while (length) {
> -		if (!sg_remaining) {
> -			nvmet_tcp_fatal_error(cmd->queue);
> -			return;
> -		}
> -		if (!sg->length || sg->length <= sg_offset) {
> -			nvmet_tcp_fatal_error(cmd->queue);
> -			return;
> -		}
> +		if (!sg_remaining)
> +			return -EPROTO;
> +
> +		if (!sg->length || sg->length <= sg_offset)
> +			return -EPROTO;
> +
>   		u32 iov_len = min_t(u32, length, sg->length - sg_offset);
>   
>   		bvec_set_page(iov, sg_page(sg), iov_len,
> @@ -394,6 +391,7 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>   
>   	iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov,
>   		      nr_pages, cmd->pdu_len);
> +	return 0;
>   }
>   
>   static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
> @@ -931,7 +929,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>   	return 0;
>   }
>   
> -static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
> +static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>   		struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
>   {
>   	size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
> @@ -947,19 +945,23 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>   	if (!nvme_is_write(cmd->req.cmd) || !data_len ||
>   	    data_len > cmd->req.port->inline_data_size) {
>   		nvmet_prepare_receive_pdu(queue);
> -		return;
> +		return 0;
>   	}
>   
>   	ret = nvmet_tcp_map_data(cmd);
>   	if (unlikely(ret)) {
>   		pr_err("queue %d: failed to map data\n", queue->idx);
>   		nvmet_tcp_fatal_error(queue);
> -		return;
> +		return -EPROTO;
>   	}
>   
>   	queue->rcv_state = NVMET_TCP_RECV_DATA;
> -	nvmet_tcp_build_pdu_iovec(cmd);
>   	cmd->flags |= NVMET_TCP_F_INIT_FAILED;
> +	ret = nvmet_tcp_build_pdu_iovec(cmd);
> +	if (unlikely(ret))
> +		pr_err("queue %d: failed to build PDU iovec\n", queue->idx);

Why don't we call 'nvmet_tcp_fatal_error()' here?
The original code did ...

> +
> +	return ret;
>   }
>   
>   static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
> @@ -1011,7 +1013,10 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
>   		goto err_proto;
>   	}
>   	cmd->pdu_recv = 0;
> -	nvmet_tcp_build_pdu_iovec(cmd);
> +	if (unlikely(nvmet_tcp_build_pdu_iovec(cmd))) {
> +		pr_err("queue %d: failed to build PDU iovec\n", queue->idx);
> +		goto err_proto;
> +	}
>   	queue->cmd = cmd;
>   	queue->rcv_state = NVMET_TCP_RECV_DATA;
>   
> @@ -1074,8 +1079,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>   			le32_to_cpu(req->cmd->common.dptr.sgl.length),
>   			le16_to_cpu(req->cqe->status));
>   
> -		nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
> -		return 0;
> +		return nvmet_tcp_handle_req_failure(queue, queue->cmd, req);
>   	}
>   
>   	ret = nvmet_tcp_map_data(queue->cmd);
> @@ -1092,8 +1096,11 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>   	if (nvmet_tcp_need_data_in(queue->cmd)) {
>   		if (nvmet_tcp_has_inline_data(queue->cmd)) {
>   			queue->rcv_state = NVMET_TCP_RECV_DATA;
> -			nvmet_tcp_build_pdu_iovec(queue->cmd);
> -			return 0;
> +			ret = nvmet_tcp_build_pdu_iovec(queue->cmd);
> +			if (unlikely(ret))
> +				pr_err("queue %d: failed to build PDU iovec\n",
> +					queue->idx);
> +			return ret;
>   		}
>   		/* send back R2T */
>   		nvmet_tcp_queue_response(&queue->cmd->req);

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers
  2026-03-13  7:10   ` Hannes Reinecke
@ 2026-03-13  7:45     ` Maurizio Lombardi
  2026-03-13  8:11       ` Hannes Reinecke
  0 siblings, 1 reply; 10+ messages in thread
From: Maurizio Lombardi @ 2026-03-13  7:45 UTC (permalink / raw)
  To: Hannes Reinecke, Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak, mlombard

On Fri Mar 13, 2026 at 8:10 AM CET, Hannes Reinecke wrote:
> On 3/12/26 14:40, Maurizio Lombardi wrote:
>> Currently, when nvmet_tcp_build_pdu_iovec() detects an out-of-bounds
>> PDU length or offset, it triggers nvmet_tcp_fatal_error(cmd->queue)
>> and returns early. However, because the function returns void, the
>> callers are entirely unaware that a fatal error has occurred and
>> that the cmd->recv_msg.msg_iter was left uninitialized.
>> 
>> Callers such as nvmet_tcp_handle_h2c_data_pdu() proceed to blindly
>> overwrite the queue state with queue->rcv_state = NVMET_TCP_RECV_DATA
>> Consequently, the socket receiving loop may attempt to read incoming
>> network data into the uninitialized iterator.
>> 
>> Fix this by shifting the error handling responsibility to the callers.
>> 
>> Fixes: 52a0a9854934 ("nvmet-tcp: add bounds checks in nvmet_tcp_build_pdu_iovec")
>> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
>> ---
>>   drivers/nvme/target/tcp.c | 51 ++++++++++++++++++++++-----------------
>>   1 file changed, 29 insertions(+), 22 deletions(-)
>> 
>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>> index acc71a26733f..1fbf12df1183 100644
>> --- a/drivers/nvme/target/tcp.c
>> +++ b/drivers/nvme/target/tcp.c
>> @@ -351,7 +351,7 @@ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
>>   
>>   static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue);
>>   
>> -static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>> +static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>   {
>>   	struct bio_vec *iov = cmd->iov;
>>   	struct scatterlist *sg;
>> @@ -364,22 +364,19 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>   	offset = cmd->rbytes_done;
>>   	cmd->sg_idx = offset / PAGE_SIZE;
>>   	sg_offset = offset % PAGE_SIZE;
>> -	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt) {
>> -		nvmet_tcp_fatal_error(cmd->queue);
>> -		return;
>> -	}
>> +	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt)
>> +		return -EPROTO;
>> +
>>   	sg = &cmd->req.sg[cmd->sg_idx];
>>   	sg_remaining = cmd->req.sg_cnt - cmd->sg_idx;
>>   
>>   	while (length) {
>> -		if (!sg_remaining) {
>> -			nvmet_tcp_fatal_error(cmd->queue);
>> -			return;
>> -		}
>> -		if (!sg->length || sg->length <= sg_offset) {
>> -			nvmet_tcp_fatal_error(cmd->queue);
>> -			return;
>> -		}
>> +		if (!sg_remaining)
>> +			return -EPROTO;
>> +
>> +		if (!sg->length || sg->length <= sg_offset)
>> +			return -EPROTO;
>> +
>>   		u32 iov_len = min_t(u32, length, sg->length - sg_offset);
>>   
>>   		bvec_set_page(iov, sg_page(sg), iov_len,
>> @@ -394,6 +391,7 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>   
>>   	iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov,
>>   		      nr_pages, cmd->pdu_len);
>> +	return 0;
>>   }
>>   
>>   static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
>> @@ -931,7 +929,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>>   	return 0;
>>   }
>>   
>> -static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>> +static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>   		struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
>>   {
>>   	size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
>> @@ -947,19 +945,23 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>   	if (!nvme_is_write(cmd->req.cmd) || !data_len ||
>>   	    data_len > cmd->req.port->inline_data_size) {
>>   		nvmet_prepare_receive_pdu(queue);
>> -		return;
>> +		return 0;
>>   	}
>>   
>>   	ret = nvmet_tcp_map_data(cmd);
>>   	if (unlikely(ret)) {
>>   		pr_err("queue %d: failed to map data\n", queue->idx);
>>   		nvmet_tcp_fatal_error(queue);
>> -		return;
>> +		return -EPROTO;
>>   	}
>>   
>>   	queue->rcv_state = NVMET_TCP_RECV_DATA;
>> -	nvmet_tcp_build_pdu_iovec(cmd);
>>   	cmd->flags |= NVMET_TCP_F_INIT_FAILED;
>> +	ret = nvmet_tcp_build_pdu_iovec(cmd);
>> +	if (unlikely(ret))
>> +		pr_err("queue %d: failed to build PDU iovec\n", queue->idx);
>
> Why don't we call 'nvmet_tcp_fatal_error()' here?
> The original code did ...

We don't need too, the error code is propagated up to
nvmet_tcp_done_recv_pdu, then up to nvmet_tcp_try_recv_pdu()
and then up to nvmet_tcp_try_recv_one(). Finally, it reaches
nvmet_tcp_try_recv() that checks the error code and calls
nvmet_tcp_socket_error(), because the error code is -EPROTO
nvmet_tcp_fatal_error() will be called.


Maurizio



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers
  2026-03-13  7:45     ` Maurizio Lombardi
@ 2026-03-13  8:11       ` Hannes Reinecke
  0 siblings, 0 replies; 10+ messages in thread
From: Hannes Reinecke @ 2026-03-13  8:11 UTC (permalink / raw)
  To: Maurizio Lombardi, Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak

On 3/13/26 08:45, Maurizio Lombardi wrote:
> On Fri Mar 13, 2026 at 8:10 AM CET, Hannes Reinecke wrote:
>> On 3/12/26 14:40, Maurizio Lombardi wrote:
>>> Currently, when nvmet_tcp_build_pdu_iovec() detects an out-of-bounds
>>> PDU length or offset, it triggers nvmet_tcp_fatal_error(cmd->queue)
>>> and returns early. However, because the function returns void, the
>>> callers are entirely unaware that a fatal error has occurred and
>>> that the cmd->recv_msg.msg_iter was left uninitialized.
>>>
>>> Callers such as nvmet_tcp_handle_h2c_data_pdu() proceed to blindly
>>> overwrite the queue state with queue->rcv_state = NVMET_TCP_RECV_DATA
>>> Consequently, the socket receiving loop may attempt to read incoming
>>> network data into the uninitialized iterator.
>>>
>>> Fix this by shifting the error handling responsibility to the callers.
>>>
>>> Fixes: 52a0a9854934 ("nvmet-tcp: add bounds checks in nvmet_tcp_build_pdu_iovec")
>>> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
>>> ---
>>>    drivers/nvme/target/tcp.c | 51 ++++++++++++++++++++++-----------------
>>>    1 file changed, 29 insertions(+), 22 deletions(-)
>>>
>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>> index acc71a26733f..1fbf12df1183 100644
>>> --- a/drivers/nvme/target/tcp.c
>>> +++ b/drivers/nvme/target/tcp.c
>>> @@ -351,7 +351,7 @@ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd)
>>>    
>>>    static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue);
>>>    
>>> -static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>> +static int nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>>    {
>>>    	struct bio_vec *iov = cmd->iov;
>>>    	struct scatterlist *sg;
>>> @@ -364,22 +364,19 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>>    	offset = cmd->rbytes_done;
>>>    	cmd->sg_idx = offset / PAGE_SIZE;
>>>    	sg_offset = offset % PAGE_SIZE;
>>> -	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt) {
>>> -		nvmet_tcp_fatal_error(cmd->queue);
>>> -		return;
>>> -	}
>>> +	if (!cmd->req.sg_cnt || cmd->sg_idx >= cmd->req.sg_cnt)
>>> +		return -EPROTO;
>>> +
>>>    	sg = &cmd->req.sg[cmd->sg_idx];
>>>    	sg_remaining = cmd->req.sg_cnt - cmd->sg_idx;
>>>    
>>>    	while (length) {
>>> -		if (!sg_remaining) {
>>> -			nvmet_tcp_fatal_error(cmd->queue);
>>> -			return;
>>> -		}
>>> -		if (!sg->length || sg->length <= sg_offset) {
>>> -			nvmet_tcp_fatal_error(cmd->queue);
>>> -			return;
>>> -		}
>>> +		if (!sg_remaining)
>>> +			return -EPROTO;
>>> +
>>> +		if (!sg->length || sg->length <= sg_offset)
>>> +			return -EPROTO;
>>> +
>>>    		u32 iov_len = min_t(u32, length, sg->length - sg_offset);
>>>    
>>>    		bvec_set_page(iov, sg_page(sg), iov_len,
>>> @@ -394,6 +391,7 @@ static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd)
>>>    
>>>    	iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov,
>>>    		      nr_pages, cmd->pdu_len);
>>> +	return 0;
>>>    }
>>>    
>>>    static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
>>> @@ -931,7 +929,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>>>    	return 0;
>>>    }
>>>    
>>> -static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>> +static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>>    		struct nvmet_tcp_cmd *cmd, struct nvmet_req *req)
>>>    {
>>>    	size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length);
>>> @@ -947,19 +945,23 @@ static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>>    	if (!nvme_is_write(cmd->req.cmd) || !data_len ||
>>>    	    data_len > cmd->req.port->inline_data_size) {
>>>    		nvmet_prepare_receive_pdu(queue);
>>> -		return;
>>> +		return 0;
>>>    	}
>>>    
>>>    	ret = nvmet_tcp_map_data(cmd);
>>>    	if (unlikely(ret)) {
>>>    		pr_err("queue %d: failed to map data\n", queue->idx);
>>>    		nvmet_tcp_fatal_error(queue);
>>> -		return;
>>> +		return -EPROTO;
>>>    	}
>>>    
>>>    	queue->rcv_state = NVMET_TCP_RECV_DATA;
>>> -	nvmet_tcp_build_pdu_iovec(cmd);
>>>    	cmd->flags |= NVMET_TCP_F_INIT_FAILED;
>>> +	ret = nvmet_tcp_build_pdu_iovec(cmd);
>>> +	if (unlikely(ret))
>>> +		pr_err("queue %d: failed to build PDU iovec\n", queue->idx);
>>
>> Why don't we call 'nvmet_tcp_fatal_error()' here?
>> The original code did ...
> 
> We don't need too, the error code is propagated up to
> nvmet_tcp_done_recv_pdu, then up to nvmet_tcp_try_recv_pdu()
> and then up to nvmet_tcp_try_recv_one(). Finally, it reaches
> nvmet_tcp_try_recv() that checks the error code and calls
> nvmet_tcp_socket_error(), because the error code is -EPROTO
> nvmet_tcp_fatal_error() will be called.
> 
Ah, got it.

Reviewed-by: Hannes Reinecke <hare@suse.de>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error()
  2026-03-12 13:40 [PATCH V2 0/2] nvmet-tcp: fix receive path error handling and state machine Maurizio Lombardi
  2026-03-12 13:40 ` [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers Maurizio Lombardi
@ 2026-03-12 13:40 ` Maurizio Lombardi
  2026-03-13  7:20   ` Hannes Reinecke
  1 sibling, 1 reply; 10+ messages in thread
From: Maurizio Lombardi @ 2026-03-12 13:40 UTC (permalink / raw)
  To: kbusch; +Cc: linux-nvme, dwagner, yjshin0438, hare, sagi, chaitanyak, mlombard

Executing nvmet_tcp_fatal_error() is generally the responsibility
of the caller (nvmet_tcp_try_recv); all other functions should
just return the error code.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
---
 drivers/nvme/target/tcp.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 1fbf12df1183..b250b13854b4 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -885,7 +885,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
 	if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
 		pr_err("bad nvme-tcp pdu length (%d)\n",
 			le32_to_cpu(icreq->hdr.plen));
-		nvmet_tcp_fatal_error(queue);
 		return -EPROTO;
 	}
 
@@ -951,7 +950,6 @@ static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
 	ret = nvmet_tcp_map_data(cmd);
 	if (unlikely(ret)) {
 		pr_err("queue %d: failed to map data\n", queue->idx);
-		nvmet_tcp_fatal_error(queue);
 		return -EPROTO;
 	}
 
@@ -1024,7 +1022,6 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
 
 err_proto:
 	/* FIXME: use proper transport errors */
-	nvmet_tcp_fatal_error(queue);
 	return -EPROTO;
 }
 
@@ -1039,7 +1036,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 		if (hdr->type != nvme_tcp_icreq) {
 			pr_err("unexpected pdu type (%d) before icreq\n",
 				hdr->type);
-			nvmet_tcp_fatal_error(queue);
 			return -EPROTO;
 		}
 		return nvmet_tcp_handle_icreq(queue);
@@ -1048,7 +1044,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 	if (unlikely(hdr->type == nvme_tcp_icreq)) {
 		pr_err("queue %d: received icreq pdu in state %d\n",
 			queue->idx, queue->state);
-		nvmet_tcp_fatal_error(queue);
 		return -EPROTO;
 	}
 
@@ -1065,7 +1060,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 		pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
 			queue->idx, queue->nr_cmds, queue->send_list_len,
 			nvme_cmd->common.opcode);
-		nvmet_tcp_fatal_error(queue);
 		return -ENOMEM;
 	}
 
@@ -1086,9 +1080,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
 	if (unlikely(ret)) {
 		pr_err("queue %d: failed to map data\n", queue->idx);
 		if (nvmet_tcp_has_inline_data(queue->cmd))
-			nvmet_tcp_fatal_error(queue);
-		else
-			nvmet_req_complete(req, ret);
+			return -EPROTO;
+
+		nvmet_req_complete(req, ret);
 		ret = -EAGAIN;
 		goto out;
 	}
@@ -1211,7 +1205,6 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
 
 		if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
 			pr_err("unexpected pdu type %d\n", hdr->type);
-			nvmet_tcp_fatal_error(queue);
 			return -EIO;
 		}
 
@@ -1225,16 +1218,12 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
 	}
 
 	if (queue->hdr_digest &&
-	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
-		nvmet_tcp_fatal_error(queue); /* fatal */
+	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen))
 		return -EPROTO;
-	}
 
 	if (queue->data_digest &&
-	    nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
-		nvmet_tcp_fatal_error(queue); /* fatal */
+	    nvmet_tcp_check_ddgst(queue, &queue->pdu))
 		return -EPROTO;
-	}
 
 	return nvmet_tcp_done_recv_pdu(queue);
 }
@@ -1319,7 +1308,6 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
 			le32_to_cpu(cmd->exp_ddgst));
 		nvmet_req_uninit(&cmd->req);
 		nvmet_tcp_free_cmd_buffers(cmd);
-		nvmet_tcp_fatal_error(queue);
 		ret = -EPROTO;
 		goto out;
 	}
-- 
2.53.0



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error()
  2026-03-12 13:40 ` [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error() Maurizio Lombardi
@ 2026-03-13  7:20   ` Hannes Reinecke
  2026-03-13  7:51     ` Maurizio Lombardi
  0 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2026-03-13  7:20 UTC (permalink / raw)
  To: Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak, mlombard

On 3/12/26 14:40, Maurizio Lombardi wrote:
> Executing nvmet_tcp_fatal_error() is generally the responsibility
> of the caller (nvmet_tcp_try_recv); all other functions should
> just return the error code.
> 
> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
> ---
>   drivers/nvme/target/tcp.c | 22 +++++-----------------
>   1 file changed, 5 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
> index 1fbf12df1183..b250b13854b4 100644
> --- a/drivers/nvme/target/tcp.c
> +++ b/drivers/nvme/target/tcp.c
> @@ -885,7 +885,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>   	if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
>   		pr_err("bad nvme-tcp pdu length (%d)\n",
>   			le32_to_cpu(icreq->hdr.plen));
> -		nvmet_tcp_fatal_error(queue);
>   		return -EPROTO;
>   	}
>   
> @@ -951,7 +950,6 @@ static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>   	ret = nvmet_tcp_map_data(cmd);
>   	if (unlikely(ret)) {
>   		pr_err("queue %d: failed to map data\n", queue->idx);
> -		nvmet_tcp_fatal_error(queue);
>   		return -EPROTO;
>   	}
>   
> @@ -1024,7 +1022,6 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
>   
>   err_proto:
>   	/* FIXME: use proper transport errors */
> -	nvmet_tcp_fatal_error(queue);
>   	return -EPROTO;
>   }
>   
> @@ -1039,7 +1036,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>   		if (hdr->type != nvme_tcp_icreq) {
>   			pr_err("unexpected pdu type (%d) before icreq\n",
>   				hdr->type);
> -			nvmet_tcp_fatal_error(queue);
>   			return -EPROTO;
>   		}
>   		return nvmet_tcp_handle_icreq(queue);
> @@ -1048,7 +1044,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>   	if (unlikely(hdr->type == nvme_tcp_icreq)) {
>   		pr_err("queue %d: received icreq pdu in state %d\n",
>   			queue->idx, queue->state);
> -		nvmet_tcp_fatal_error(queue);
>   		return -EPROTO;
>   	}
>   
> @@ -1065,7 +1060,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>   		pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
>   			queue->idx, queue->nr_cmds, queue->send_list_len,
>   			nvme_cmd->common.opcode);
> -		nvmet_tcp_fatal_error(queue);
>   		return -ENOMEM;
>   	}
>   
> @@ -1086,9 +1080,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>   	if (unlikely(ret)) {
>   		pr_err("queue %d: failed to map data\n", queue->idx);
>   		if (nvmet_tcp_has_inline_data(queue->cmd))
> -			nvmet_tcp_fatal_error(queue);
> -		else
> -			nvmet_req_complete(req, ret);
> +			return -EPROTO;
> +
> +		nvmet_req_complete(req, ret);
>   		ret = -EAGAIN;
>   		goto out;
>   	}
> @@ -1211,7 +1205,6 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>   
>   		if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
>   			pr_err("unexpected pdu type %d\n", hdr->type);
> -			nvmet_tcp_fatal_error(queue);
>   			return -EIO;
>   		}
>   
> @@ -1225,16 +1218,12 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>   	}
>   
>   	if (queue->hdr_digest &&
> -	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
> -		nvmet_tcp_fatal_error(queue); /* fatal */
> +	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen))
>   		return -EPROTO;
> -	}
>   
>   	if (queue->data_digest &&
> -	    nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
> -		nvmet_tcp_fatal_error(queue); /* fatal */
> +	    nvmet_tcp_check_ddgst(queue, &queue->pdu))
>   		return -EPROTO;
> -	}
>   
>   	return nvmet_tcp_done_recv_pdu(queue);
>   }
> @@ -1319,7 +1308,6 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
>   			le32_to_cpu(cmd->exp_ddgst));
>   		nvmet_req_uninit(&cmd->req);
>   		nvmet_tcp_free_cmd_buffers(cmd);
> -		nvmet_tcp_fatal_error(queue);
>   		ret = -EPROTO;
>   		goto out;
>   	}

I seem to be missing something crucial here. But after applying this
patch _all_ invocations to nvmet_tcp_fatal_error() are gone.
(Applying the patches on top of linus tree with topmost commit 
0257f64bdac7fdca30fa3cae0df8b9ecbec7733a)

I must be missing something crucial here ...

Cheers,

Hannes_
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error()
  2026-03-13  7:20   ` Hannes Reinecke
@ 2026-03-13  7:51     ` Maurizio Lombardi
  2026-03-13  8:11       ` Hannes Reinecke
  0 siblings, 1 reply; 10+ messages in thread
From: Maurizio Lombardi @ 2026-03-13  7:51 UTC (permalink / raw)
  To: Hannes Reinecke, Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak, mlombard

On Fri Mar 13, 2026 at 8:20 AM CET, Hannes Reinecke wrote:
> On 3/12/26 14:40, Maurizio Lombardi wrote:
>> Executing nvmet_tcp_fatal_error() is generally the responsibility
>> of the caller (nvmet_tcp_try_recv); all other functions should
>> just return the error code.
>> 
>> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
>> ---
>>   drivers/nvme/target/tcp.c | 22 +++++-----------------
>>   1 file changed, 5 insertions(+), 17 deletions(-)
>> 
>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>> index 1fbf12df1183..b250b13854b4 100644
>> --- a/drivers/nvme/target/tcp.c
>> +++ b/drivers/nvme/target/tcp.c
>> @@ -885,7 +885,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>>   	if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
>>   		pr_err("bad nvme-tcp pdu length (%d)\n",
>>   			le32_to_cpu(icreq->hdr.plen));
>> -		nvmet_tcp_fatal_error(queue);
>>   		return -EPROTO;
>>   	}
>>   
>> @@ -951,7 +950,6 @@ static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>   	ret = nvmet_tcp_map_data(cmd);
>>   	if (unlikely(ret)) {
>>   		pr_err("queue %d: failed to map data\n", queue->idx);
>> -		nvmet_tcp_fatal_error(queue);
>>   		return -EPROTO;
>>   	}
>>   
>> @@ -1024,7 +1022,6 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
>>   
>>   err_proto:
>>   	/* FIXME: use proper transport errors */
>> -	nvmet_tcp_fatal_error(queue);
>>   	return -EPROTO;
>>   }
>>   
>> @@ -1039,7 +1036,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>   		if (hdr->type != nvme_tcp_icreq) {
>>   			pr_err("unexpected pdu type (%d) before icreq\n",
>>   				hdr->type);
>> -			nvmet_tcp_fatal_error(queue);
>>   			return -EPROTO;
>>   		}
>>   		return nvmet_tcp_handle_icreq(queue);
>> @@ -1048,7 +1044,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>   	if (unlikely(hdr->type == nvme_tcp_icreq)) {
>>   		pr_err("queue %d: received icreq pdu in state %d\n",
>>   			queue->idx, queue->state);
>> -		nvmet_tcp_fatal_error(queue);
>>   		return -EPROTO;
>>   	}
>>   
>> @@ -1065,7 +1060,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>   		pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
>>   			queue->idx, queue->nr_cmds, queue->send_list_len,
>>   			nvme_cmd->common.opcode);
>> -		nvmet_tcp_fatal_error(queue);
>>   		return -ENOMEM;
>>   	}
>>   
>> @@ -1086,9 +1080,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>   	if (unlikely(ret)) {
>>   		pr_err("queue %d: failed to map data\n", queue->idx);
>>   		if (nvmet_tcp_has_inline_data(queue->cmd))
>> -			nvmet_tcp_fatal_error(queue);
>> -		else
>> -			nvmet_req_complete(req, ret);
>> +			return -EPROTO;
>> +
>> +		nvmet_req_complete(req, ret);
>>   		ret = -EAGAIN;
>>   		goto out;
>>   	}
>> @@ -1211,7 +1205,6 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>>   
>>   		if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
>>   			pr_err("unexpected pdu type %d\n", hdr->type);
>> -			nvmet_tcp_fatal_error(queue);
>>   			return -EIO;
>>   		}
>>   
>> @@ -1225,16 +1218,12 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>>   	}
>>   
>>   	if (queue->hdr_digest &&
>> -	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
>> -		nvmet_tcp_fatal_error(queue); /* fatal */
>> +	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen))
>>   		return -EPROTO;
>> -	}
>>   
>>   	if (queue->data_digest &&
>> -	    nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
>> -		nvmet_tcp_fatal_error(queue); /* fatal */
>> +	    nvmet_tcp_check_ddgst(queue, &queue->pdu))
>>   		return -EPROTO;
>> -	}
>>   
>>   	return nvmet_tcp_done_recv_pdu(queue);
>>   }
>> @@ -1319,7 +1308,6 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
>>   			le32_to_cpu(cmd->exp_ddgst));
>>   		nvmet_req_uninit(&cmd->req);
>>   		nvmet_tcp_free_cmd_buffers(cmd);
>> -		nvmet_tcp_fatal_error(queue);
>>   		ret = -EPROTO;
>>   		goto out;
>>   	}
>
> I seem to be missing something crucial here. But after applying this
> patch _all_ invocations to nvmet_tcp_fatal_error() are gone.
> (Applying the patches on top of linus tree with topmost commit 
> 0257f64bdac7fdca30fa3cae0df8b9ecbec7733a)
>
> I must be missing something crucial here ...

I've just rebased my branch on top of master and I can still
see nvmet_tcp_socket_error() calling nvmet_tcp_fatal_error()

nvmet_tcp_socket_error() is called by both nvmet_tcp_try_send()
and nvmet_tcp_try_recv() in case of errors, so it's still here.
with us.

Maurizio



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error()
  2026-03-13  7:51     ` Maurizio Lombardi
@ 2026-03-13  8:11       ` Hannes Reinecke
  2026-03-13  8:14         ` Maurizio Lombardi
  0 siblings, 1 reply; 10+ messages in thread
From: Hannes Reinecke @ 2026-03-13  8:11 UTC (permalink / raw)
  To: Maurizio Lombardi, Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak

On 3/13/26 08:51, Maurizio Lombardi wrote:
> On Fri Mar 13, 2026 at 8:20 AM CET, Hannes Reinecke wrote:
>> On 3/12/26 14:40, Maurizio Lombardi wrote:
>>> Executing nvmet_tcp_fatal_error() is generally the responsibility
>>> of the caller (nvmet_tcp_try_recv); all other functions should
>>> just return the error code.
>>>
>>> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
>>> ---
>>>    drivers/nvme/target/tcp.c | 22 +++++-----------------
>>>    1 file changed, 5 insertions(+), 17 deletions(-)
>>>
>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>> index 1fbf12df1183..b250b13854b4 100644
>>> --- a/drivers/nvme/target/tcp.c
>>> +++ b/drivers/nvme/target/tcp.c
>>> @@ -885,7 +885,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>>>    	if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
>>>    		pr_err("bad nvme-tcp pdu length (%d)\n",
>>>    			le32_to_cpu(icreq->hdr.plen));
>>> -		nvmet_tcp_fatal_error(queue);
>>>    		return -EPROTO;
>>>    	}
>>>    
>>> @@ -951,7 +950,6 @@ static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>>    	ret = nvmet_tcp_map_data(cmd);
>>>    	if (unlikely(ret)) {
>>>    		pr_err("queue %d: failed to map data\n", queue->idx);
>>> -		nvmet_tcp_fatal_error(queue);
>>>    		return -EPROTO;
>>>    	}
>>>    
>>> @@ -1024,7 +1022,6 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
>>>    
>>>    err_proto:
>>>    	/* FIXME: use proper transport errors */
>>> -	nvmet_tcp_fatal_error(queue);
>>>    	return -EPROTO;
>>>    }
>>>    
>>> @@ -1039,7 +1036,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>    		if (hdr->type != nvme_tcp_icreq) {
>>>    			pr_err("unexpected pdu type (%d) before icreq\n",
>>>    				hdr->type);
>>> -			nvmet_tcp_fatal_error(queue);
>>>    			return -EPROTO;
>>>    		}
>>>    		return nvmet_tcp_handle_icreq(queue);
>>> @@ -1048,7 +1044,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>    	if (unlikely(hdr->type == nvme_tcp_icreq)) {
>>>    		pr_err("queue %d: received icreq pdu in state %d\n",
>>>    			queue->idx, queue->state);
>>> -		nvmet_tcp_fatal_error(queue);
>>>    		return -EPROTO;
>>>    	}
>>>    
>>> @@ -1065,7 +1060,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>    		pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
>>>    			queue->idx, queue->nr_cmds, queue->send_list_len,
>>>    			nvme_cmd->common.opcode);
>>> -		nvmet_tcp_fatal_error(queue);
>>>    		return -ENOMEM;
>>>    	}
>>>    
>>> @@ -1086,9 +1080,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>    	if (unlikely(ret)) {
>>>    		pr_err("queue %d: failed to map data\n", queue->idx);
>>>    		if (nvmet_tcp_has_inline_data(queue->cmd))
>>> -			nvmet_tcp_fatal_error(queue);
>>> -		else
>>> -			nvmet_req_complete(req, ret);
>>> +			return -EPROTO;
>>> +
>>> +		nvmet_req_complete(req, ret);
>>>    		ret = -EAGAIN;
>>>    		goto out;
>>>    	}
>>> @@ -1211,7 +1205,6 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>>>    
>>>    		if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
>>>    			pr_err("unexpected pdu type %d\n", hdr->type);
>>> -			nvmet_tcp_fatal_error(queue);
>>>    			return -EIO;
>>>    		}
>>>    
>>> @@ -1225,16 +1218,12 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>>>    	}
>>>    
>>>    	if (queue->hdr_digest &&
>>> -	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
>>> -		nvmet_tcp_fatal_error(queue); /* fatal */
>>> +	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen))
>>>    		return -EPROTO;
>>> -	}
>>>    
>>>    	if (queue->data_digest &&
>>> -	    nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
>>> -		nvmet_tcp_fatal_error(queue); /* fatal */
>>> +	    nvmet_tcp_check_ddgst(queue, &queue->pdu))
>>>    		return -EPROTO;
>>> -	}
>>>    
>>>    	return nvmet_tcp_done_recv_pdu(queue);
>>>    }
>>> @@ -1319,7 +1308,6 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
>>>    			le32_to_cpu(cmd->exp_ddgst));
>>>    		nvmet_req_uninit(&cmd->req);
>>>    		nvmet_tcp_free_cmd_buffers(cmd);
>>> -		nvmet_tcp_fatal_error(queue);
>>>    		ret = -EPROTO;
>>>    		goto out;
>>>    	}
>>
>> I seem to be missing something crucial here. But after applying this
>> patch _all_ invocations to nvmet_tcp_fatal_error() are gone.
>> (Applying the patches on top of linus tree with topmost commit
>> 0257f64bdac7fdca30fa3cae0df8b9ecbec7733a)
>>
>> I must be missing something crucial here ...
> 
> I've just rebased my branch on top of master and I can still
> see nvmet_tcp_socket_error() calling nvmet_tcp_fatal_error()
> 
> nvmet_tcp_socket_error() is called by both nvmet_tcp_try_send()
> and nvmet_tcp_try_recv() in case of errors, so it's still here.
> with us.
> 
Right.
But now nvmet_tcp_fatal_error() has just a single caller, so please
fold it into nvmet_tcp_socket_error() (and remove the forward declaration).

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                  Kernel Storage Architect
hare@suse.de                                +49 911 74053 688
SUSE Software Solutions GmbH, Frankenstr. 146, 90461 Nürnberg
HRB 36809 (AG Nürnberg), GF: I. Totev, A. McDonald, W. Knoblich


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error()
  2026-03-13  8:11       ` Hannes Reinecke
@ 2026-03-13  8:14         ` Maurizio Lombardi
  0 siblings, 0 replies; 10+ messages in thread
From: Maurizio Lombardi @ 2026-03-13  8:14 UTC (permalink / raw)
  To: Hannes Reinecke, Maurizio Lombardi, Maurizio Lombardi, kbusch
  Cc: linux-nvme, dwagner, yjshin0438, sagi, chaitanyak

On Fri Mar 13, 2026 at 9:11 AM CET, Hannes Reinecke wrote:
> On 3/13/26 08:51, Maurizio Lombardi wrote:
>> On Fri Mar 13, 2026 at 8:20 AM CET, Hannes Reinecke wrote:
>>> On 3/12/26 14:40, Maurizio Lombardi wrote:
>>>> Executing nvmet_tcp_fatal_error() is generally the responsibility
>>>> of the caller (nvmet_tcp_try_recv); all other functions should
>>>> just return the error code.
>>>>
>>>> Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
>>>> ---
>>>>    drivers/nvme/target/tcp.c | 22 +++++-----------------
>>>>    1 file changed, 5 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
>>>> index 1fbf12df1183..b250b13854b4 100644
>>>> --- a/drivers/nvme/target/tcp.c
>>>> +++ b/drivers/nvme/target/tcp.c
>>>> @@ -885,7 +885,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
>>>>    	if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) {
>>>>    		pr_err("bad nvme-tcp pdu length (%d)\n",
>>>>    			le32_to_cpu(icreq->hdr.plen));
>>>> -		nvmet_tcp_fatal_error(queue);
>>>>    		return -EPROTO;
>>>>    	}
>>>>    
>>>> @@ -951,7 +950,6 @@ static int nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue,
>>>>    	ret = nvmet_tcp_map_data(cmd);
>>>>    	if (unlikely(ret)) {
>>>>    		pr_err("queue %d: failed to map data\n", queue->idx);
>>>> -		nvmet_tcp_fatal_error(queue);
>>>>    		return -EPROTO;
>>>>    	}
>>>>    
>>>> @@ -1024,7 +1022,6 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
>>>>    
>>>>    err_proto:
>>>>    	/* FIXME: use proper transport errors */
>>>> -	nvmet_tcp_fatal_error(queue);
>>>>    	return -EPROTO;
>>>>    }
>>>>    
>>>> @@ -1039,7 +1036,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>>    		if (hdr->type != nvme_tcp_icreq) {
>>>>    			pr_err("unexpected pdu type (%d) before icreq\n",
>>>>    				hdr->type);
>>>> -			nvmet_tcp_fatal_error(queue);
>>>>    			return -EPROTO;
>>>>    		}
>>>>    		return nvmet_tcp_handle_icreq(queue);
>>>> @@ -1048,7 +1044,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>>    	if (unlikely(hdr->type == nvme_tcp_icreq)) {
>>>>    		pr_err("queue %d: received icreq pdu in state %d\n",
>>>>    			queue->idx, queue->state);
>>>> -		nvmet_tcp_fatal_error(queue);
>>>>    		return -EPROTO;
>>>>    	}
>>>>    
>>>> @@ -1065,7 +1060,6 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>>    		pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d",
>>>>    			queue->idx, queue->nr_cmds, queue->send_list_len,
>>>>    			nvme_cmd->common.opcode);
>>>> -		nvmet_tcp_fatal_error(queue);
>>>>    		return -ENOMEM;
>>>>    	}
>>>>    
>>>> @@ -1086,9 +1080,9 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
>>>>    	if (unlikely(ret)) {
>>>>    		pr_err("queue %d: failed to map data\n", queue->idx);
>>>>    		if (nvmet_tcp_has_inline_data(queue->cmd))
>>>> -			nvmet_tcp_fatal_error(queue);
>>>> -		else
>>>> -			nvmet_req_complete(req, ret);
>>>> +			return -EPROTO;
>>>> +
>>>> +		nvmet_req_complete(req, ret);
>>>>    		ret = -EAGAIN;
>>>>    		goto out;
>>>>    	}
>>>> @@ -1211,7 +1205,6 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>>>>    
>>>>    		if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) {
>>>>    			pr_err("unexpected pdu type %d\n", hdr->type);
>>>> -			nvmet_tcp_fatal_error(queue);
>>>>    			return -EIO;
>>>>    		}
>>>>    
>>>> @@ -1225,16 +1218,12 @@ static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue)
>>>>    	}
>>>>    
>>>>    	if (queue->hdr_digest &&
>>>> -	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) {
>>>> -		nvmet_tcp_fatal_error(queue); /* fatal */
>>>> +	    nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen))
>>>>    		return -EPROTO;
>>>> -	}
>>>>    
>>>>    	if (queue->data_digest &&
>>>> -	    nvmet_tcp_check_ddgst(queue, &queue->pdu)) {
>>>> -		nvmet_tcp_fatal_error(queue); /* fatal */
>>>> +	    nvmet_tcp_check_ddgst(queue, &queue->pdu))
>>>>    		return -EPROTO;
>>>> -	}
>>>>    
>>>>    	return nvmet_tcp_done_recv_pdu(queue);
>>>>    }
>>>> @@ -1319,7 +1308,6 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue)
>>>>    			le32_to_cpu(cmd->exp_ddgst));
>>>>    		nvmet_req_uninit(&cmd->req);
>>>>    		nvmet_tcp_free_cmd_buffers(cmd);
>>>> -		nvmet_tcp_fatal_error(queue);
>>>>    		ret = -EPROTO;
>>>>    		goto out;
>>>>    	}
>>>
>>> I seem to be missing something crucial here. But after applying this
>>> patch _all_ invocations to nvmet_tcp_fatal_error() are gone.
>>> (Applying the patches on top of linus tree with topmost commit
>>> 0257f64bdac7fdca30fa3cae0df8b9ecbec7733a)
>>>
>>> I must be missing something crucial here ...
>> 
>> I've just rebased my branch on top of master and I can still
>> see nvmet_tcp_socket_error() calling nvmet_tcp_fatal_error()
>> 
>> nvmet_tcp_socket_error() is called by both nvmet_tcp_try_send()
>> and nvmet_tcp_try_recv() in case of errors, so it's still here.
>> with us.
>> 
> Right.
> But now nvmet_tcp_fatal_error() has just a single caller, so please
> fold it into nvmet_tcp_socket_error() (and remove the forward declaration).

Ok, will send a V3.

Thanks for the review.

Maurizio


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2026-03-13  8:14 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-12 13:40 [PATCH V2 0/2] nvmet-tcp: fix receive path error handling and state machine Maurizio Lombardi
2026-03-12 13:40 ` [PATCH V2 1/2] nvmet-tcp: propagate nvmet_tcp_build_pdu_iovec() errors to its callers Maurizio Lombardi
2026-03-13  7:10   ` Hannes Reinecke
2026-03-13  7:45     ` Maurizio Lombardi
2026-03-13  8:11       ` Hannes Reinecke
2026-03-12 13:40 ` [PATCH V2 2/2] nvmet-tcp: remove redundant calls to nvmet_tcp_fatal_error() Maurizio Lombardi
2026-03-13  7:20   ` Hannes Reinecke
2026-03-13  7:51     ` Maurizio Lombardi
2026-03-13  8:11       ` Hannes Reinecke
2026-03-13  8:14         ` Maurizio Lombardi

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox