* [PATCH 1/6] svcrdma: validate Read chunk positions before reconstruction
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
@ 2026-05-26 13:35 ` Chuck Lever
2026-05-26 13:35 ` [PATCH 2/6] svcrdma: Fix offset arithmetic in read_chunk_range Chuck Lever
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2026-05-26 13:35 UTC (permalink / raw)
To: Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
The RPC/RDMA Read chunk position field is supplied by the remote
client and stored verbatim in the parsed chunk list.
xdr_count_read_segments() checks only 4-byte alignment; it never
compares the position against the received inline body length.
In the single-chunk path, svc_rdma_read_complete_one() splits the
head and tail kvecs at ch_position. A position past the inline
body underflows the tail length, exposing adjacent slab memory to
the upper XDR decoder.
In the multi-chunk path, svc_rdma_read_multiple_chunks() computes
gap lengths between chunks as unsigned subtractions from
ch_position. Overlapping Read chunks cause these subtractions to
underflow. A final position past the inline body likewise
underflows the trailing gap length. svc_rdma_copy_inline_range()
then copies past the receive buffer into request pages that are
returned to the client through the Reply channel.
Bound inline-range copies in svc_rdma_copy_inline_range() against
the decoded inline RPC body saved in rc_saved_arg. Reject a
single Read chunk positioned beyond that body, and reject
multi-chunk lists where accumulated read bytes exceed the next
chunk's position. Apply the same position and overlap checks in
the call-chunk interleaving path.
Fixes: d96962e6d0e2 ("svcrdma: Use the new parsed chunk list when pulling Read chunks")
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/svc_rdma_rw.c | 38 ++++++++++++++++++++++++++++++--------
1 file changed, 30 insertions(+), 8 deletions(-)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index c535e6de9654..eb4bc56ed387 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1065,7 +1065,7 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
* svc_rdma_copy_inline_range - Copy part of the inline content into pages
* @rqstp: RPC transaction context
* @head: context for ongoing I/O
- * @offset: offset into the Receive buffer of region to copy
+ * @offset: offset into the inline content of region to copy
* @remaining: length of region to copy
*
* Take a page at a time from rqstp->rq_pages and copy the inline
@@ -1082,9 +1082,13 @@ static int svc_rdma_copy_inline_range(struct svc_rqst *rqstp,
unsigned int offset,
unsigned int remaining)
{
- unsigned char *dst, *src = head->rc_recv_buf;
+ unsigned char *dst, *src = head->rc_saved_arg.head[0].iov_base;
+ unsigned int inline_len = head->rc_saved_arg.head[0].iov_len;
unsigned int page_no, numpages;
+ if (offset > inline_len || remaining > inline_len - offset)
+ return -EINVAL;
+
numpages = PAGE_ALIGN(head->rc_pageoff + remaining) >> PAGE_SHIFT;
for (page_no = 0; page_no < numpages; page_no++) {
unsigned int page_len;
@@ -1135,9 +1139,10 @@ svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
{
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
struct svc_rdma_chunk *chunk, *next;
- unsigned int start, length;
+ unsigned int inline_len, start, length;
int ret;
+ inline_len = head->rc_saved_arg.head[0].iov_len;
start = 0;
chunk = pcl_first_chunk(pcl);
length = chunk->ch_position;
@@ -1155,6 +1160,8 @@ svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
break;
start += length;
+ if (head->rc_readbytes > next->ch_position)
+ return -EINVAL;
length = next->ch_position - head->rc_readbytes;
ret = svc_rdma_copy_inline_range(rqstp, head, start, length);
if (ret < 0)
@@ -1162,7 +1169,9 @@ svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
}
start += length;
- length = head->rc_byte_len - start;
+ if (start > inline_len)
+ return -EINVAL;
+ length = inline_len - start;
return svc_rdma_copy_inline_range(rqstp, head, start, length);
}
@@ -1187,8 +1196,12 @@ svc_rdma_read_multiple_chunks(struct svc_rqst *rqstp,
static int svc_rdma_read_data_item(struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head)
{
- return svc_rdma_build_read_chunk(rqstp, head,
- pcl_first_chunk(&head->rc_read_pcl));
+ struct svc_rdma_chunk *chunk = pcl_first_chunk(&head->rc_read_pcl);
+
+ if (chunk->ch_position > head->rc_saved_arg.head[0].iov_len)
+ return -EINVAL;
+
+ return svc_rdma_build_read_chunk(rqstp, head, chunk);
}
/**
@@ -1257,14 +1270,17 @@ static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp,
pcl_first_chunk(&head->rc_call_pcl);
const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
struct svc_rdma_chunk *chunk, *next;
- unsigned int start, length;
+ unsigned int call_len, start, length;
int ret;
if (pcl_is_empty(pcl))
return svc_rdma_build_read_chunk(rqstp, head, call_chunk);
+ call_len = call_chunk->ch_length;
start = 0;
chunk = pcl_first_chunk(pcl);
+ if (chunk->ch_position > call_len)
+ return -EINVAL;
length = chunk->ch_position;
ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
start, length);
@@ -1281,6 +1297,10 @@ static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp,
break;
start += length;
+ if (next->ch_position > call_len)
+ return -EINVAL;
+ if (head->rc_readbytes > next->ch_position)
+ return -EINVAL;
length = next->ch_position - head->rc_readbytes;
ret = svc_rdma_read_chunk_range(rqstp, head, call_chunk,
start, length);
@@ -1289,7 +1309,9 @@ static int svc_rdma_read_call_chunk(struct svc_rqst *rqstp,
}
start += length;
- length = call_chunk->ch_length - start;
+ if (start > call_len)
+ return -EINVAL;
+ length = call_len - start;
return svc_rdma_read_chunk_range(rqstp, head, call_chunk,
start, length);
}
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 2/6] svcrdma: Fix offset arithmetic in read_chunk_range
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
2026-05-26 13:35 ` [PATCH 1/6] svcrdma: validate Read chunk positions before reconstruction Chuck Lever
@ 2026-05-26 13:35 ` Chuck Lever
2026-05-26 13:35 ` [PATCH 3/6] svcrdma: reject oversized Read segments at decode time Chuck Lever
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2026-05-26 13:35 UTC (permalink / raw)
To: Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chris Mason, Chuck Lever
From: Chris Mason <clm@meta.com>
svc_rdma_read_chunk_range() walks a Read chunk's segment list to
build a sub-range starting at byte offset and spanning length bytes
for a Position-Zero or Call chunk. Two arithmetic defects in the
per-segment loop produce wrong DMA lengths and a u32 underflow:
pcl_for_each_segment(segment, chunk) {
if (offset > segment->rs_length) {
offset -= segment->rs_length;
continue;
}
dummy.rs_handle = segment->rs_handle;
dummy.rs_length = min_t(u32, length,
segment->rs_length) - offset;
dummy.rs_offset = segment->rs_offset + offset;
First, the skip predicate uses '>' instead of '>='. When offset
equals the segment's full rs_length, the segment is fully consumed
and should be skipped, but the loop falls through into the body.
The resulting dummy.rs_length is min_t(u32, length, rs_length) -
rs_length, which underflows to a near-UINT_MAX u32 when length is
smaller than rs_length, or is zero otherwise.
Second, the length formula subtracts offset from the min_t() result
rather than from segment->rs_length before the cap. For offset > 0
the segment's residual is rs_length - offset, not rs_length, so the
cap must be applied to the residual. With the current bracketing,
whenever length is smaller than rs_length - offset the per-segment
length becomes length - offset instead of length, silently dropping
offset bytes from the rebuilt chunk. Combined with the boundary
case above it also enables the u32 underflow path, which propagates
a huge nr_bvec into svc_rdma_build_read_segment() and a multi-MiB
kmalloc_array_node() in svc_rdma_get_rw_ctxt().
Additionally, svc_rdma_read_call_chunk() can invoke this function
with length == 0 when the last Read chunk ends exactly at the end
of the Call chunk. With the corrected >= predicate, every segment
is skipped and the function returns the initial -EINVAL, rejecting
a valid request. Return success immediately when length is zero.
Also break out of the loop once length is fully consumed to avoid
passing zero-length segments to svc_rdma_build_read_segment().
Fix by using '>=' so a fully-consumed segment is skipped, by
moving '- offset' inside min_t() so the cap is applied to the
segment's residual length, by returning success for zero-length
requests, and by stopping iteration when the requested range has
been consumed.
Fixes: d7cc73972661 ("svcrdma: support multiple Read chunks per RPC")
Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@meta.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/svc_rdma_rw.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index eb4bc56ed387..182bd577e0b7 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1227,17 +1227,20 @@ static int svc_rdma_read_chunk_range(struct svc_rqst *rqstp,
const struct svc_rdma_segment *segment;
int ret;
+ if (!length)
+ return 0;
+
ret = -EINVAL;
pcl_for_each_segment(segment, chunk) {
struct svc_rdma_segment dummy;
- if (offset > segment->rs_length) {
+ if (offset >= segment->rs_length) {
offset -= segment->rs_length;
continue;
}
dummy.rs_handle = segment->rs_handle;
- dummy.rs_length = min_t(u32, length, segment->rs_length) - offset;
+ dummy.rs_length = min_t(u32, length, segment->rs_length - offset);
dummy.rs_offset = segment->rs_offset + offset;
ret = svc_rdma_build_read_segment(rqstp, head, &dummy);
@@ -1246,6 +1249,8 @@ static int svc_rdma_read_chunk_range(struct svc_rqst *rqstp,
head->rc_readbytes += dummy.rs_length;
length -= dummy.rs_length;
+ if (!length)
+ break;
offset = 0;
}
return ret;
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 3/6] svcrdma: reject oversized Read segments at decode time
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
2026-05-26 13:35 ` [PATCH 1/6] svcrdma: validate Read chunk positions before reconstruction Chuck Lever
2026-05-26 13:35 ` [PATCH 2/6] svcrdma: Fix offset arithmetic in read_chunk_range Chuck Lever
@ 2026-05-26 13:35 ` Chuck Lever
2026-05-26 13:35 ` [PATCH 4/6] svcrdma: fix pcl_for_each_segment for empty chunks Chuck Lever
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2026-05-26 13:35 UTC (permalink / raw)
To: Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chuck Lever, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
The RPC/RDMA Read list decoder stores wire-supplied segment
lengths without validation. xdr_count_read_segments() checks
4-byte alignment for non-zero position values but does not
cap the segment length.
An oversized rs_length reaches svc_rdma_build_read_segment(),
which derives nr_bvec from it and can drive a large dynamic
bvec allocation before verifying that enough rq_pages remain.
If the post-allocation page-overrun guard fires, the freshly
acquired rw context is not returned, leaking the resource.
Reject any segment whose length exceeds the receive context's
page budget during Read list decoding, consistent with how
xdr_check_write_chunk() bounds Write segment counts against
rc_maxpages. Also return the rw context on the existing
post-allocation overrun path in svc_rdma_build_read_segment(),
keeping that defensive guard balanced.
Fixes: 5ee62b4a9113 ("svcrdma: use bvec-based RDMA read/write API")
Signed-off-by: Chuck Lever <cel@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 2 ++
net/sunrpc/xprtrdma/svc_rdma_rw.c | 5 +++--
2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index fe9bf0371b6e..15c1d8ae5301 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -458,6 +458,8 @@ static bool xdr_count_read_segments(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
xdr_decode_read_segment(p, &position, &handle,
&length, &offset);
+ if (length > rctxt->rc_maxpages << PAGE_SHIFT)
+ return false;
if (position) {
if (position & 3)
return false;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 182bd577e0b7..587e4cd29303 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -1013,7 +1013,7 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
len -= seg_len;
if (len && ((head->rc_curpage + 1) > rqstp->rq_maxpages))
- goto out_overrun;
+ goto out_put;
}
ret = svc_rdma_rw_ctx_init(rdma, ctxt, segment->rs_offset,
@@ -1027,7 +1027,8 @@ static int svc_rdma_build_read_segment(struct svc_rqst *rqstp,
cc->cc_sqecount += ret;
return 0;
-out_overrun:
+out_put:
+ svc_rdma_put_rw_ctxt(rdma, ctxt);
trace_svcrdma_page_overrun_err(&cc->cc_cid, head->rc_curpage);
return -EINVAL;
}
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 4/6] svcrdma: fix pcl_for_each_segment for empty chunks
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
` (2 preceding siblings ...)
2026-05-26 13:35 ` [PATCH 3/6] svcrdma: reject oversized Read segments at decode time Chuck Lever
@ 2026-05-26 13:35 ` Chuck Lever
2026-05-26 13:35 ` [PATCH 5/6] svcrdma: reject Write/Reply chunks with segcount 0 Chuck Lever
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2026-05-26 13:35 UTC (permalink / raw)
To: Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chris Mason, Chuck Lever
From: Chris Mason <clm@meta.com>
When a parsed chunk list contains a chunk whose ch_segcount is zero,
pcl_for_each_segment computes its inclusive upper bound as
&chunk->ch_segments[ch_segcount - 1]. ch_segcount is u32, so the
subtraction wraps to 0xFFFFFFFF and the bound lands far past the
ch_segments flex array. The loop body then walks unrelated memory at
sizeof(struct svc_rdma_segment) stride until it faults.
A zero-segcount chunk is reachable from the wire:
xdr_check_write_chunk() only rejects segcount values greater than
rc_maxpages, and pcl_alloc_write() links a freshly allocated chunk
onto rc_write_pcl/rc_reply_pcl before its segment-fill loop runs,
so a Write or Reply chunk advertising zero segments leaves
ch_segcount == 0 on the list. When the transport has negotiated
Send-With-Invalidate, svc_rdma_get_inv_rkey() iterates all four
PCLs with pcl_for_each_segment and dereferences segment->rs_handle
on each iteration, turning the underflow into an out-of-bounds read
and a general protection fault.
xdr_check_write_list / xdr_check_reply_chunk
pcl_alloc_write()
chunk = pcl_alloc_chunk(...) /* ch_segcount = 0 */
list_add_tail(&chunk->ch_list, &pcl->cl_chunks)
/* fill loop iterates zero times for wire segcount 0 */
svc_rdma_get_inv_rkey()
pcl_for_each_chunk(rc_write_pcl)
pcl_for_each_segment(segment, chunk)
pos <= &ch_segments[0u - 1u] /* 0xFFFFFFFF */
segment->rs_handle /* OOB read -> GPF */
Fix by switching the macro to a half-open upper bound that uses
ch_segcount directly. For ch_segcount == 0 the loop start equals the
loop end and the body is skipped; for ch_segcount > 0 the iteration
range is unchanged. All six existing call sites in
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c and
net/sunrpc/xprtrdma/svc_rdma_rw.c remain correct under the new bound,
so no caller changes are needed.
Fixes: 78147ca8b4a9 ("svcrdma: Add a "parsed chunk list" data structure")
Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@meta.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
include/linux/sunrpc/svc_rdma_pcl.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h
index 7516ad0fae80..655681cf8fed 100644
--- a/include/linux/sunrpc/svc_rdma_pcl.h
+++ b/include/linux/sunrpc/svc_rdma_pcl.h
@@ -97,7 +97,7 @@ pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk)
*/
#define pcl_for_each_segment(pos, chunk) \
for (pos = &(chunk)->ch_segments[0]; \
- pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \
+ pos < &(chunk)->ch_segments[(chunk)->ch_segcount]; \
pos++)
/**
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 5/6] svcrdma: reject Write/Reply chunks with segcount 0
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
` (3 preceding siblings ...)
2026-05-26 13:35 ` [PATCH 4/6] svcrdma: fix pcl_for_each_segment for empty chunks Chuck Lever
@ 2026-05-26 13:35 ` Chuck Lever
2026-05-26 13:36 ` [PATCH 6/6] svcrdma: Validate Read chunk positions at decode time Chuck Lever
2026-05-27 15:19 ` [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Jeff Layton
6 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2026-05-26 13:35 UTC (permalink / raw)
To: Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chris Mason, Chuck Lever
From: Chris Mason <clm@meta.com>
A peer can send a Write or Reply chunk whose segcount field is zero.
xdr_check_write_chunk() only rejects segcount > rc_maxpages, so zero
passes the range check, and xdr_inline_decode(stream, 0) returns the
current (non-NULL) cursor without advancing. The function returns
true and pcl_alloc_write() then links a struct svc_rdma_chunk with
ch_segcount == 0 onto rc_write_pcl or rc_reply_pcl.
An earlier patch in this series made pcl_for_each_segment() safe for
ch_segcount == 0, so this no longer drives the memory walk it used
to. Rejecting the malformed frame at the decode boundary is still
worthwhile as defense in depth: it keeps degenerate zero-segment
chunks off the parsed chunk lists entirely, so any future consumer
that walks ch_segments directly cannot observe one, and it makes the
zero-floor easy to backport to trees where the macro change is more
intrusive. RFC 8166 has no meaning for a Write/Reply chunk that
describes no remote buffer, so no legitimate client is affected.
xdr_check_reply_chunk() funnels Reply chunks through
xdr_check_write_chunk() and inherits the same rejection.
pcl_alloc_write() also links each chunk onto the parsed chunk list
before filling its segment array. If a future change weakens the
segcount-0 rejection, an incomplete chunk is visible to consumers
during the fill loop. Reorder so that list_add_tail() follows the
segment fill loop, ensuring only fully-populated chunks appear on
the list.
Fixes: 78147ca8b4a9 ("svcrdma: Add a "parsed chunk list" data structure")
Assisted-by: kres (claude-opus-4-7)
Signed-off-by: Chris Mason <clm@meta.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
net/sunrpc/xprtrdma/svc_rdma_pcl.c | 2 +-
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 9 ++++++---
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_pcl.c b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
index 1f8f7dad8b6f..18d1045799ce 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_pcl.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
@@ -213,7 +213,6 @@ bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
chunk = pcl_alloc_chunk(segcount, 0);
if (!chunk)
return false;
- list_add_tail(&chunk->ch_list, &pcl->cl_chunks);
for (j = 0; j < segcount; j++) {
segment = &chunk->ch_segments[j];
@@ -225,6 +224,7 @@ bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
chunk->ch_length += segment->rs_length;
chunk->ch_segcount++;
}
+ list_add_tail(&chunk->ch_list, &pcl->cl_chunks);
}
return true;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 15c1d8ae5301..f6a7533a7555 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -510,10 +510,13 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
return false;
/* Before trusting the segcount value enough to use it in
- * a computation, perform a simple range check. This is an
- * arbitrary but sensible limit (ie, not architectural).
+ * a computation, perform a simple range check. A zero
+ * segcount describes no remote buffer and is rejected so
+ * downstream consumers never see a degenerate ch_segcount==0
+ * chunk. The upper bound is an arbitrary but sensible limit
+ * (ie, not architectural).
*/
- if (unlikely(segcount > rctxt->rc_maxpages))
+ if (segcount == 0 || unlikely(segcount > rctxt->rc_maxpages))
return false;
p = xdr_inline_decode(&rctxt->rc_stream,
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH 6/6] svcrdma: Validate Read chunk positions at decode time
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
` (4 preceding siblings ...)
2026-05-26 13:35 ` [PATCH 5/6] svcrdma: reject Write/Reply chunks with segcount 0 Chuck Lever
@ 2026-05-26 13:36 ` Chuck Lever
2026-05-27 15:19 ` [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Jeff Layton
6 siblings, 0 replies; 8+ messages in thread
From: Chuck Lever @ 2026-05-26 13:36 UTC (permalink / raw)
To: Jeff Layton, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chuck Lever
From: Chuck Lever <chuck.lever@oracle.com>
Read chunk position and length validation is currently scattered
across three consumer functions: svc_rdma_read_data_item(),
svc_rdma_read_multiple_chunks(), and svc_rdma_read_call_chunk().
Each independently guards against the same class of unsigned
arithmetic underflow from untrusted wire values. Any new consumer
of the parsed Read chunk list must replicate these checks or risk
re-introducing the defects fixed by earlier patches in this series.
Add pcl_check_read_chunk_positions() to consolidate position and
length validation into a single post-decode pass, called from
svc_rdma_xdr_decode_req() after all three chunk lists have been
parsed and the inline body length is known. The pass verifies
three properties:
- Each Read chunk's inline-body offset (its unreduced-stream
position minus the cumulative length of preceding Read chunks)
falls within the inline body length, or within the Call chunk
length for interleaved reads.
- Adjacent Read chunk positions do not overlap: cumulative read
bytes at each transition do not exceed the next position.
- Each chunk length does not exceed the receive context's page
budget.
Malformed frames are rejected before reaching any consumer. The
existing consumer-side guards remain as defense in depth.
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
include/linux/sunrpc/svc_rdma_pcl.h | 2 ++
net/sunrpc/xprtrdma/svc_rdma_pcl.c | 61 +++++++++++++++++++++++++++++++--
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 3 ++
3 files changed, 63 insertions(+), 3 deletions(-)
diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h
index 655681cf8fed..6346d8cf2587 100644
--- a/include/linux/sunrpc/svc_rdma_pcl.h
+++ b/include/linux/sunrpc/svc_rdma_pcl.h
@@ -119,6 +119,8 @@ extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p);
extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p);
extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
struct svc_rdma_pcl *pcl, __be32 *p);
+extern bool pcl_check_read_chunk_positions(struct svc_rdma_recv_ctxt *rctxt,
+ unsigned int inline_len);
extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl,
const struct xdr_buf *xdr,
int (*actor)(const struct xdr_buf *,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_pcl.c b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
index 18d1045799ce..8623722790f2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_pcl.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
@@ -149,9 +149,6 @@ bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
* cl_count is updated to be the number of chunks (ie.
* unique position values) in the Read list.
* %false: Memory allocation failed.
- *
- * TODO:
- * - Check for chunk range overlaps
*/
bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
{
@@ -229,6 +226,64 @@ bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
return true;
}
+/**
+ * pcl_check_read_chunk_positions - Validate Read chunk positions
+ * @rctxt: Ingress receive context with populated chunk lists
+ * @inline_len: Length of the inline RPC body after the transport header
+ *
+ * Read chunk positions are offsets in the unreduced XDR stream
+ * (RFC 8166 Section 3.4.4), so each position includes the
+ * cumulative length of preceding Read chunks. This function
+ * subtracts those lengths to recover the inline-body offset
+ * before comparing against @inline_len or the Call chunk length.
+ *
+ * Rejects frames where a Read chunk's inline-body offset exceeds
+ * the bound, where adjacent Read chunks overlap, or where any
+ * single chunk length exceeds the page budget.
+ *
+ * Return values:
+ * %true: Read chunk positions and lengths are valid
+ * %false: Malformed chunk list detected
+ */
+bool pcl_check_read_chunk_positions(struct svc_rdma_recv_ctxt *rctxt,
+ unsigned int inline_len)
+{
+ unsigned int max_len, bound, total_read;
+ struct svc_rdma_chunk *chunk, *next;
+
+ max_len = rctxt->rc_maxpages << PAGE_SHIFT;
+
+ if (!pcl_is_empty(&rctxt->rc_call_pcl)) {
+ chunk = pcl_first_chunk(&rctxt->rc_call_pcl);
+ if (chunk->ch_length > max_len)
+ return false;
+ bound = chunk->ch_length;
+ } else {
+ bound = inline_len;
+ }
+
+ if (pcl_is_empty(&rctxt->rc_read_pcl))
+ return true;
+
+ total_read = 0;
+ pcl_for_each_chunk(chunk, &rctxt->rc_read_pcl) {
+ if (chunk->ch_position - total_read > bound)
+ return false;
+ if (chunk->ch_length > max_len)
+ return false;
+
+ next = pcl_next_chunk(&rctxt->rc_read_pcl, chunk);
+ if (!next)
+ break;
+
+ if (chunk->ch_position + chunk->ch_length > next->ch_position)
+ return false;
+ total_read += chunk->ch_length;
+ }
+
+ return true;
+}
+
static int pcl_process_region(const struct xdr_buf *xdr,
unsigned int offset, unsigned int length,
int (*actor)(const struct xdr_buf *, void *),
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index f6a7533a7555..d64b5f78ce8a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -724,6 +724,9 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
rq_arg->head[0].iov_base = rctxt->rc_stream.p;
hdr_len = xdr_stream_pos(&rctxt->rc_stream);
+ if (!pcl_check_read_chunk_positions(rctxt,
+ rq_arg->head[0].iov_len - hdr_len))
+ goto out_inval;
rq_arg->head[0].iov_len -= hdr_len;
rq_arg->len -= hdr_len;
trace_svcrdma_decode_rqst(rctxt, rdma_argp, hdr_len);
--
2.54.0
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values
2026-05-26 13:35 [PATCH 0/6] svcrdma: harden parsed chunk list against malformed wire values Chuck Lever
` (5 preceding siblings ...)
2026-05-26 13:36 ` [PATCH 6/6] svcrdma: Validate Read chunk positions at decode time Chuck Lever
@ 2026-05-27 15:19 ` Jeff Layton
6 siblings, 0 replies; 8+ messages in thread
From: Jeff Layton @ 2026-05-27 15:19 UTC (permalink / raw)
To: Chuck Lever, NeilBrown, Olga Kornievskaia, Dai Ngo, Tom Talpey
Cc: linux-rdma, linux-nfs, Chuck Lever, Chris Mason
On Tue, 2026-05-26 at 09:35 -0400, Chuck Lever wrote:
> The RPC/RDMA transport stores wire-supplied chunk positions, lengths,
> and segment counts verbatim in the parsed chunk list. Consumer
> functions compute gap lengths and sub-range offsets from these values
> using unsigned subtraction. A malicious or buggy peer can supply
> values that cause these subtractions to underflow, exposing slab
> memory to the Reply channel or driving oversized allocations.
>
> The fix proceeds in two layers. Consumer functions in svc_rdma_rw.c
> gain bounds checks against the saved inline body length before each
> unsigned subtraction, closing the immediate underflow paths. The
> Read list decoder in svc_rdma_recvfrom.c gains a segment-length cap
> against the receive context's page budget, and the existing page-
> overrun guard in svc_rdma_build_read_segment() is corrected to
> release the rw context it has already acquired. These consumer-side
> fixes are backportable independently.
>
> pcl_for_each_segment() uses an inclusive upper bound that underflows
> when ch_segcount is zero, turning a zero-segment Write or Reply
> chunk into an unbounded memory walk. The macro is changed to a
> half-open bound that naturally produces an empty iteration for
> ch_segcount == 0. The decoder then also rejects zero-segment chunks
> at the wire boundary, and reorders pcl_alloc_write() so that only
> fully-populated chunks appear on the list. The macro fix remains as
> defense in depth and is safe to backport to trees without the decoder
> change.
>
> A consolidation pass validates Read chunk positions and overlap
> invariants once, immediately after decoding, so that future PCL
> consumers inherit the guarantee without replicating per-site checks.
>
> ---
> Chris Mason (3):
> svcrdma: Fix offset arithmetic in read_chunk_range
> svcrdma: fix pcl_for_each_segment for empty chunks
> svcrdma: reject Write/Reply chunks with segcount 0
>
> Chuck Lever (3):
> svcrdma: validate Read chunk positions before reconstruction
> svcrdma: reject oversized Read segments at decode time
> svcrdma: Validate Read chunk positions at decode time
>
> include/linux/sunrpc/svc_rdma_pcl.h | 4 ++-
> net/sunrpc/xprtrdma/svc_rdma_pcl.c | 63 ++++++++++++++++++++++++++++++---
> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 14 ++++++--
> net/sunrpc/xprtrdma/svc_rdma_rw.c | 52 ++++++++++++++++++++-------
> 4 files changed, 113 insertions(+), 20 deletions(-)
> ---
> base-commit: 887d478bb2115cec0be8caae58bad4d4b3109b1a
> change-id: 20260524-rpc-kernel-bugs-fb537a0615ec
>
> Best regards,
> --
> Chuck Lever <chuck.lever@oracle.com>
Acked-by: Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 8+ messages in thread