* [PATCH V2 1/3] nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length
2023-12-22 15:17 [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs Maurizio Lombardi
@ 2023-12-22 15:17 ` Maurizio Lombardi
2023-12-25 10:19 ` Sagi Grimberg
2023-12-22 15:17 ` [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete() Maurizio Lombardi
` (2 subsequent siblings)
3 siblings, 1 reply; 11+ messages in thread
From: Maurizio Lombardi @ 2023-12-22 15:17 UTC (permalink / raw)
To: sagi; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
If the host sends an H2CData command with an invalid DATAL,
the kernel may crash in nvmet_tcp_build_pdu_iovec().
Unable to handle kernel NULL pointer dereference at
virtual address 0000000000000000
lr : nvmet_tcp_io_work+0x6ac/0x718 [nvmet_tcp]
Call trace:
process_one_work+0x174/0x3c8
worker_thread+0x2d0/0x3e8
kthread+0x104/0x110
Fix the bug by raising a fatal error if DATAL isn't coherent
with the packet size.
Also, the PDU length should never exceed the MAXH2CDATA parameter which
has been communicated to the host in nvmet_tcp_handle_icreq().
Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver")
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
---
drivers/nvme/target/tcp.c | 13 ++++++++++++-
1 file changed, 12 insertions(+), 1 deletion(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 4cc27856aa8f..ad16795934b8 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -24,6 +24,7 @@
#include "nvmet.h"
#define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE)
+#define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */
static int param_store_val(const char *str, int *val, int min, int max)
{
@@ -923,7 +924,7 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
icresp->hdr.pdo = 0;
icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen);
icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
- icresp->maxdata = cpu_to_le32(0x400000); /* 16M arbitrary limit */
+ icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA);
icresp->cpda = 0;
if (queue->hdr_digest)
icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
@@ -978,6 +979,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
{
struct nvme_tcp_data_pdu *data = &queue->pdu.data;
struct nvmet_tcp_cmd *cmd;
+ unsigned int plen;
if (likely(queue->nr_cmds)) {
if (unlikely(data->ttag >= queue->nr_cmds)) {
@@ -1001,7 +1003,16 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
return -EPROTO;
}
+ plen = le32_to_cpu(data->hdr.plen);
cmd->pdu_len = le32_to_cpu(data->data_length);
+ if (unlikely(cmd->pdu_len != (plen - sizeof(*data)) ||
+ cmd->pdu_len == 0 ||
+ cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) {
+ pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len);
+ /* FIXME: use proper transport errors */
+ nvmet_tcp_fatal_error(queue);
+ return -EPROTO;
+ }
cmd->pdu_recv = 0;
nvmet_tcp_build_pdu_iovec(cmd);
queue->cmd = cmd;
--
2.39.3
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete()
2023-12-22 15:17 [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs Maurizio Lombardi
2023-12-22 15:17 ` [PATCH V2 1/3] nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length Maurizio Lombardi
@ 2023-12-22 15:17 ` Maurizio Lombardi
2023-12-25 10:28 ` Sagi Grimberg
2024-01-01 9:44 ` Sagi Grimberg
2023-12-22 15:17 ` [PATCH V2 3/3] nvmet-tcp: remove boilerplate code Maurizio Lombardi
2024-01-02 21:07 ` [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs Keith Busch
3 siblings, 2 replies; 11+ messages in thread
From: Maurizio Lombardi @ 2023-12-22 15:17 UTC (permalink / raw)
To: sagi; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
in nvmet_tcp_handle_h2c_data_pdu(), if the host sends a data_offset
different from rbytes_done, the driver ends up calling nvmet_req_complete()
passing a status error.
The problem is that at this point cmd->req is not yet initialized,
the kernel will crash after dereferencing a NULL pointer.
Fix the bug by replacing the call to nvmet_req_complete() with
nvmet_tcp_fatal_error().
Fixes: 872d26a391da ("nvmet-tcp: add NVMe over TCP target driver")
Reviewed-by: Keith Busch <kbsuch@kernel.org>
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
---
drivers/nvme/target/tcp.c | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index ad16795934b8..b4b6a8ac8089 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -998,8 +998,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
data->ttag, le32_to_cpu(data->data_offset),
cmd->rbytes_done);
/* FIXME: use path and transport errors */
- nvmet_req_complete(&cmd->req,
- NVME_SC_INVALID_FIELD | NVME_SC_DNR);
+ nvmet_tcp_fatal_error(queue);
return -EPROTO;
}
--
2.39.3
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete()
2023-12-22 15:17 ` [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete() Maurizio Lombardi
@ 2023-12-25 10:28 ` Sagi Grimberg
2023-12-29 8:51 ` Maurizio Lombardi
2024-01-01 9:44 ` Sagi Grimberg
1 sibling, 1 reply; 11+ messages in thread
From: Sagi Grimberg @ 2023-12-25 10:28 UTC (permalink / raw)
To: Maurizio Lombardi; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
> in nvmet_tcp_handle_h2c_data_pdu(), if the host sends a data_offset
> different from rbytes_done, the driver ends up calling nvmet_req_complete()
> passing a status error.
> The problem is that at this point cmd->req is not yet initialized,
> the kernel will crash after dereferencing a NULL pointer.
>
> Fix the bug by replacing the call to nvmet_req_complete() with
> nvmet_tcp_fatal_error().
This is indeed a bug. However nvmet attempts to gracefully fail
a particular nvme command when there is a recoverable error (see
nvmet_tcp_handle_req_failure).
In the case where the length is arbitrarily long, then it really
doesn't make sense for nvmet to just accept it and throw it away,
but if this is an offset error, maybe it is...
I'm not hard set on this, but it would be beneficial to have
allow some graceful cmd failure here... Perhaps you can share
more about what was causing this error?
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete()
2023-12-25 10:28 ` Sagi Grimberg
@ 2023-12-29 8:51 ` Maurizio Lombardi
2024-01-01 9:44 ` Sagi Grimberg
0 siblings, 1 reply; 11+ messages in thread
From: Maurizio Lombardi @ 2023-12-29 8:51 UTC (permalink / raw)
To: Sagi Grimberg; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
po 25. 12. 2023 v 11:28 odesílatel Sagi Grimberg <sagi@grimberg.me> napsal:
>
>
> > in nvmet_tcp_handle_h2c_data_pdu(), if the host sends a data_offset
> > different from rbytes_done, the driver ends up calling nvmet_req_complete()
> > passing a status error.
> > The problem is that at this point cmd->req is not yet initialized,
> > the kernel will crash after dereferencing a NULL pointer.
> >
> > Fix the bug by replacing the call to nvmet_req_complete() with
> > nvmet_tcp_fatal_error().
>
> This is indeed a bug. However nvmet attempts to gracefully fail
> a particular nvme command when there is a recoverable error (see
> nvmet_tcp_handle_req_failure).
>
> In the case where the length is arbitrarily long, then it really
> doesn't make sense for nvmet to just accept it and throw it away,
> but if this is an offset error, maybe it is...
>
> I'm not hard set on this, but it would be beneficial to have
> allow some graceful cmd failure here...
But doesn't the nvme over fabric specification explicitly say that
those offset and size errors in H2C PDUs should
be treated as fatal transport errors (See
NVMe-over-Fabrics-1.1a-2021.07.12, page 69) ?
What is really missing here is setting the Fatal Error Status field in
the C2HTermReq PDU, that's
why I left the FIXME tag in the comment.
> Perhaps you can share
> more about what was causing this error?
>
Nothing actually, but someone could write a simple program that
sends invalid packets (syzkaller, for example) and easily make the
target's kernel crash.
Maurizio
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete()
2023-12-29 8:51 ` Maurizio Lombardi
@ 2024-01-01 9:44 ` Sagi Grimberg
0 siblings, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2024-01-01 9:44 UTC (permalink / raw)
To: Maurizio Lombardi; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
>> This is indeed a bug. However nvmet attempts to gracefully fail
>> a particular nvme command when there is a recoverable error (see
>> nvmet_tcp_handle_req_failure).
>>
>> In the case where the length is arbitrarily long, then it really
>> doesn't make sense for nvmet to just accept it and throw it away,
>> but if this is an offset error, maybe it is...
>>
>> I'm not hard set on this, but it would be beneficial to have
>> allow some graceful cmd failure here...
>
> But doesn't the nvme over fabric specification explicitly say that
> those offset and size errors in H2C PDUs should
> be treated as fatal transport errors (See
> NVMe-over-Fabrics-1.1a-2021.07.12, page 69) ?
> What is really missing here is setting the Fatal Error Status field in
> the C2HTermReq PDU, that's
> why I left the FIXME tag in the comment.
You're correct, the spec does say that. Sometimes I forget...
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete()
2023-12-22 15:17 ` [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete() Maurizio Lombardi
2023-12-25 10:28 ` Sagi Grimberg
@ 2024-01-01 9:44 ` Sagi Grimberg
1 sibling, 0 replies; 11+ messages in thread
From: Sagi Grimberg @ 2024-01-01 9:44 UTC (permalink / raw)
To: Maurizio Lombardi; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH V2 3/3] nvmet-tcp: remove boilerplate code
2023-12-22 15:17 [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs Maurizio Lombardi
2023-12-22 15:17 ` [PATCH V2 1/3] nvmet-tcp: Fix a kernel panic when host sends an invalid H2C PDU length Maurizio Lombardi
2023-12-22 15:17 ` [PATCH V2 2/3] nvmet-tcp: fix a crash in nvmet_req_complete() Maurizio Lombardi
@ 2023-12-22 15:17 ` Maurizio Lombardi
2023-12-25 10:28 ` Sagi Grimberg
2024-01-02 21:07 ` [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs Keith Busch
3 siblings, 1 reply; 11+ messages in thread
From: Maurizio Lombardi @ 2023-12-22 15:17 UTC (permalink / raw)
To: sagi; +Cc: zahavi.alon, hch, chaitanyak, linux-nvme, kbusch
Simplify the nvmet_tcp_handle_h2c_data_pdu() function by removing
boilerplate code.
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
---
drivers/nvme/target/tcp.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index b4b6a8ac8089..3569c1255c5e 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -985,8 +985,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
if (unlikely(data->ttag >= queue->nr_cmds)) {
pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n",
queue->idx, data->ttag, queue->nr_cmds);
- nvmet_tcp_fatal_error(queue);
- return -EPROTO;
+ goto err_proto;
}
cmd = &queue->cmds[data->ttag];
} else {
@@ -997,9 +996,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
pr_err("ttag %u unexpected data offset %u (expected %u)\n",
data->ttag, le32_to_cpu(data->data_offset),
cmd->rbytes_done);
- /* FIXME: use path and transport errors */
- nvmet_tcp_fatal_error(queue);
- return -EPROTO;
+ goto err_proto;
}
plen = le32_to_cpu(data->hdr.plen);
@@ -1008,9 +1005,7 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
cmd->pdu_len == 0 ||
cmd->pdu_len > NVMET_TCP_MAXH2CDATA)) {
pr_err("H2CData PDU len %u is invalid\n", cmd->pdu_len);
- /* FIXME: use proper transport errors */
- nvmet_tcp_fatal_error(queue);
- return -EPROTO;
+ goto err_proto;
}
cmd->pdu_recv = 0;
nvmet_tcp_build_pdu_iovec(cmd);
@@ -1018,6 +1013,11 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue)
queue->rcv_state = NVMET_TCP_RECV_DATA;
return 0;
+
+err_proto:
+ /* FIXME: use proper transport errors */
+ nvmet_tcp_fatal_error(queue);
+ return -EPROTO;
}
static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue)
--
2.39.3
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs
2023-12-22 15:17 [PATCH V2 0/3] Fix some kernel panics due to the host sending invalid PDUs Maurizio Lombardi
` (2 preceding siblings ...)
2023-12-22 15:17 ` [PATCH V2 3/3] nvmet-tcp: remove boilerplate code Maurizio Lombardi
@ 2024-01-02 21:07 ` Keith Busch
3 siblings, 0 replies; 11+ messages in thread
From: Keith Busch @ 2024-01-02 21:07 UTC (permalink / raw)
To: Maurizio Lombardi; +Cc: sagi, zahavi.alon, hch, chaitanyak, linux-nvme
Thanks, series applied to nvme-6.8.
^ permalink raw reply [flat|nested] 11+ messages in thread