* [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser
@ 2026-04-21 10:03 Werner Kasselman
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
2026-04-21 10:03 ` [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology Werner Kasselman
0 siblings, 2 replies; 3+ messages in thread
From: Werner Kasselman @ 2026-04-21 10:03 UTC (permalink / raw)
To: Trond Myklebust, Anna Schumaker
Cc: Christoph Hellwig, linux-nfs@vger.kernel.org,
linux-kernel@vger.kernel.org, stable@vger.kernel.org,
Werner Kasselman
The recursive-descent volume parser in fs/nfs/blocklayout/dev.c has
three problems reachable from a malicious NFS server:
- Server-supplied volume indices are used without bounds checking,
causing an OOB heap read at volumes[idx].type.
- The mutual recursion between bl_parse_deviceid and the type-specific
parsers has no depth limit, so a cyclic or deeply chained topology
overflows the kernel stack.
- When nr_volumes is 0, the entry point computes nr_volumes - 1 as the
starting index, underflowing to -1.
Patch 1 fixes the memory-safety issues: index validation, depth cap,
and nr_volumes == 0 rejection.
Patch 2 adds a total parse-operation budget (PNFS_BLOCK_MAX_PARSE_OPS)
to prevent resource exhaustion from DAG-shaped topologies where shared
child references cause exponential tree materialization.
A standalone test exercising all three bug classes and the fixes is at:
tools/testing/pnfs-blocklayout/test-volume-parser.c
Werner Kasselman (2):
pnfs/blocklayout: validate volume indices and limit recursion depth
pnfs/blocklayout: cap total parse operations in volume topology
fs/nfs/blocklayout/blocklayout.h | 2 ++
fs/nfs/blocklayout/dev.c | 61 ++++++++++++++++++++++++--------
2 files changed, 49 insertions(+), 14 deletions(-)
--
2.43.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
@ 2026-04-21 10:03 ` Werner Kasselman
2026-04-21 10:03 ` [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology Werner Kasselman
1 sibling, 0 replies; 3+ messages in thread
From: Werner Kasselman @ 2026-04-21 10:03 UTC (permalink / raw)
To: Trond Myklebust, Anna Schumaker
Cc: Christoph Hellwig, linux-nfs@vger.kernel.org,
linux-kernel@vger.kernel.org, stable@vger.kernel.org,
Werner Kasselman
bl_parse_deviceid() trusts server-supplied volume indices from XDR
without bounds-checking and recurses with no depth limit. A malicious
server can trigger an OOB heap read or overflow the kernel stack via
crafted GETDEVICEINFO responses.
Validate that every child volume index falls within the allocated
volumes array, reject nr_volumes == 0 before computing the entry-point
index, and cap recursion at PNFS_BLOCK_MAX_DEPTH (16).
Found by static analysis (sqry).
Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing")
Cc: stable@vger.kernel.org # 3.17+
Signed-off-by: Werner Kasselman <werner@verivus.com>
---
fs/nfs/blocklayout/blocklayout.h | 1 +
fs/nfs/blocklayout/dev.c | 54 +++++++++++++++++++++++---------
2 files changed, 41 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 6da40ca19570..ec8917cc335d 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -48,6 +48,7 @@ struct pnfs_block_dev;
#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64
+#define PNFS_BLOCK_MAX_DEPTH 16
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index cc6327d97a91..d9b1af863535 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -287,7 +287,8 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask);
static int
@@ -439,12 +440,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
int ret;
- ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
+ ret = bl_parse_deviceid(server, d, volumes, nr_volumes,
+ v->slice.volume, depth + 1, gfp_mask);
if (ret)
return ret;
@@ -455,7 +458,8 @@ bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -467,8 +471,9 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
return -ENOMEM;
for (i = 0; i < v->concat.volumes_count; i++) {
- ret = bl_parse_deviceid(server, &d->children[i],
- volumes, v->concat.volumes[i], gfp_mask);
+ ret = bl_parse_deviceid(server, &d->children[i], volumes,
+ nr_volumes, v->concat.volumes[i],
+ depth + 1, gfp_mask);
if (ret)
return ret;
@@ -484,7 +489,8 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -496,8 +502,9 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
return -ENOMEM;
for (i = 0; i < v->stripe.volumes_count; i++) {
- ret = bl_parse_deviceid(server, &d->children[i],
- volumes, v->stripe.volumes[i], gfp_mask);
+ ret = bl_parse_deviceid(server, &d->children[i], volumes,
+ nr_volumes, v->stripe.volumes[i],
+ depth + 1, gfp_mask);
if (ret)
return ret;
@@ -513,19 +520,34 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
+ if (idx < 0 || idx >= nr_volumes) {
+ dprintk("volume index %d out of range (0..%d)\n",
+ idx, nr_volumes - 1);
+ return -EIO;
+ }
+
+ if (depth >= PNFS_BLOCK_MAX_DEPTH) {
+ dprintk("volume nesting too deep (%d)\n", depth);
+ return -EIO;
+ }
+
d->type = volumes[idx].type;
switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
- return bl_parse_slice(server, d, volumes, idx, gfp_mask);
+ return bl_parse_slice(server, d, volumes, nr_volumes,
+ idx, depth, gfp_mask);
case PNFS_BLOCK_VOLUME_CONCAT:
- return bl_parse_concat(server, d, volumes, idx, gfp_mask);
+ return bl_parse_concat(server, d, volumes, nr_volumes,
+ idx, depth, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
- return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+ return bl_parse_stripe(server, d, volumes, nr_volumes,
+ idx, depth, gfp_mask);
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
@@ -559,6 +581,9 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_scratch;
nr_volumes = be32_to_cpup(p++);
+ if (nr_volumes <= 0)
+ goto out_free_scratch;
+
volumes = kzalloc_objs(struct pnfs_block_volume, nr_volumes, gfp_mask);
if (!volumes)
goto out_free_scratch;
@@ -573,7 +598,8 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
if (!top)
goto out_free_volumes;
- ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
+ ret = bl_parse_deviceid(server, top, volumes, nr_volumes,
+ nr_volumes - 1, 0, gfp_mask);
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
@ 2026-04-21 10:03 ` Werner Kasselman
1 sibling, 0 replies; 3+ messages in thread
From: Werner Kasselman @ 2026-04-21 10:03 UTC (permalink / raw)
To: Trond Myklebust, Anna Schumaker
Cc: Christoph Hellwig, linux-nfs@vger.kernel.org,
linux-kernel@vger.kernel.org, stable@vger.kernel.org,
Werner Kasselman
The recursive-descent volume parser materializes a separate device
tree node for every volume reference. When CONCAT or STRIPE volumes
reference the same child index, the parser re-parses that subtree for
each reference, causing work exponential in nesting depth.
Cap the total number of bl_parse_deviceid() calls at
PNFS_BLOCK_MAX_PARSE_OPS (1024) to bound CPU and memory consumption
from server-controlled GETDEVICEINFO topologies.
Signed-off-by: Werner Kasselman <werner@verivus.com>
---
fs/nfs/blocklayout/blocklayout.h | 1 +
fs/nfs/blocklayout/dev.c | 31 +++++++++++++++++++------------
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index ec8917cc335d..6c00d98d4317 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -49,6 +49,7 @@ struct pnfs_block_dev;
#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64
#define PNFS_BLOCK_MAX_DEPTH 16
+#define PNFS_BLOCK_MAX_PARSE_OPS 1024
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index d9b1af863535..6e0df65c9b1f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -288,7 +288,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask);
+ int depth, int *remaining, gfp_t gfp_mask);
static int
@@ -441,13 +441,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
int ret;
ret = bl_parse_deviceid(server, d, volumes, nr_volumes,
- v->slice.volume, depth + 1, gfp_mask);
+ v->slice.volume, depth + 1, remaining,
+ gfp_mask);
if (ret)
return ret;
@@ -459,7 +460,7 @@ bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -473,7 +474,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
for (i = 0; i < v->concat.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i], volumes,
nr_volumes, v->concat.volumes[i],
- depth + 1, gfp_mask);
+ depth + 1, remaining, gfp_mask);
if (ret)
return ret;
@@ -490,7 +491,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -504,7 +505,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
for (i = 0; i < v->stripe.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i], volumes,
nr_volumes, v->stripe.volumes[i],
- depth + 1, gfp_mask);
+ depth + 1, remaining, gfp_mask);
if (ret)
return ret;
@@ -521,7 +522,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
if (idx < 0 || idx >= nr_volumes) {
dprintk("volume index %d out of range (0..%d)\n",
@@ -534,6 +535,11 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return -EIO;
}
+ if (--(*remaining) < 0) {
+ dprintk("volume topology too complex\n");
+ return -EIO;
+ }
+
d->type = volumes[idx].type;
switch (d->type) {
@@ -541,13 +547,13 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
return bl_parse_slice(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_CONCAT:
return bl_parse_concat(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
@@ -567,6 +573,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct xdr_buf buf;
struct folio *scratch;
int nr_volumes, ret, i;
+ int remaining = PNFS_BLOCK_MAX_PARSE_OPS;
__be32 *p;
scratch = folio_alloc(gfp_mask, 0);
@@ -599,7 +606,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_volumes;
ret = bl_parse_deviceid(server, top, volumes, nr_volumes,
- nr_volumes - 1, 0, gfp_mask);
+ nr_volumes - 1, 0, &remaining, gfp_mask);
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
--
2.43.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2026-04-21 10:03 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
2026-04-21 10:03 ` [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology Werner Kasselman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox