* [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
@ 2026-04-21 10:03 ` Werner Kasselman
2026-04-23 5:15 ` Christoph Hellwig
2026-04-21 10:03 ` [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology Werner Kasselman
2026-04-23 5:20 ` [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Christoph Hellwig
2 siblings, 1 reply; 6+ messages in thread
From: Werner Kasselman @ 2026-04-21 10:03 UTC (permalink / raw)
To: Trond Myklebust, Anna Schumaker
Cc: Christoph Hellwig, linux-nfs@vger.kernel.org,
linux-kernel@vger.kernel.org, stable@vger.kernel.org,
Werner Kasselman
bl_parse_deviceid() trusts server-supplied volume indices from XDR
without bounds-checking and recurses with no depth limit. A malicious
server can trigger an OOB heap read or overflow the kernel stack via
crafted GETDEVICEINFO responses.
Validate that every child volume index falls within the allocated
volumes array, reject nr_volumes == 0 before computing the entry-point
index, and cap recursion at PNFS_BLOCK_MAX_DEPTH (16).
Found by static analysis (sqry).
Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing")
Cc: stable@vger.kernel.org # 3.17+
Signed-off-by: Werner Kasselman <werner@verivus.com>
---
fs/nfs/blocklayout/blocklayout.h | 1 +
fs/nfs/blocklayout/dev.c | 54 +++++++++++++++++++++++---------
2 files changed, 41 insertions(+), 14 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 6da40ca19570..ec8917cc335d 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -48,6 +48,7 @@ struct pnfs_block_dev;
#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64
+#define PNFS_BLOCK_MAX_DEPTH 16
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index cc6327d97a91..d9b1af863535 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -287,7 +287,8 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask);
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask);
static int
@@ -439,12 +440,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
int ret;
- ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask);
+ ret = bl_parse_deviceid(server, d, volumes, nr_volumes,
+ v->slice.volume, depth + 1, gfp_mask);
if (ret)
return ret;
@@ -455,7 +458,8 @@ bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -467,8 +471,9 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
return -ENOMEM;
for (i = 0; i < v->concat.volumes_count; i++) {
- ret = bl_parse_deviceid(server, &d->children[i],
- volumes, v->concat.volumes[i], gfp_mask);
+ ret = bl_parse_deviceid(server, &d->children[i], volumes,
+ nr_volumes, v->concat.volumes[i],
+ depth + 1, gfp_mask);
if (ret)
return ret;
@@ -484,7 +489,8 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -496,8 +502,9 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
return -ENOMEM;
for (i = 0; i < v->stripe.volumes_count; i++) {
- ret = bl_parse_deviceid(server, &d->children[i],
- volumes, v->stripe.volumes[i], gfp_mask);
+ ret = bl_parse_deviceid(server, &d->children[i], volumes,
+ nr_volumes, v->stripe.volumes[i],
+ depth + 1, gfp_mask);
if (ret)
return ret;
@@ -513,19 +520,34 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
- struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+ struct pnfs_block_volume *volumes, int nr_volumes, int idx,
+ int depth, gfp_t gfp_mask)
{
+ if (idx < 0 || idx >= nr_volumes) {
+ dprintk("volume index %d out of range (0..%d)\n",
+ idx, nr_volumes - 1);
+ return -EIO;
+ }
+
+ if (depth >= PNFS_BLOCK_MAX_DEPTH) {
+ dprintk("volume nesting too deep (%d)\n", depth);
+ return -EIO;
+ }
+
d->type = volumes[idx].type;
switch (d->type) {
case PNFS_BLOCK_VOLUME_SIMPLE:
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
- return bl_parse_slice(server, d, volumes, idx, gfp_mask);
+ return bl_parse_slice(server, d, volumes, nr_volumes,
+ idx, depth, gfp_mask);
case PNFS_BLOCK_VOLUME_CONCAT:
- return bl_parse_concat(server, d, volumes, idx, gfp_mask);
+ return bl_parse_concat(server, d, volumes, nr_volumes,
+ idx, depth, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
- return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+ return bl_parse_stripe(server, d, volumes, nr_volumes,
+ idx, depth, gfp_mask);
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
@@ -559,6 +581,9 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_scratch;
nr_volumes = be32_to_cpup(p++);
+ if (nr_volumes <= 0)
+ goto out_free_scratch;
+
volumes = kzalloc_objs(struct pnfs_block_volume, nr_volumes, gfp_mask);
if (!volumes)
goto out_free_scratch;
@@ -573,7 +598,8 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
if (!top)
goto out_free_volumes;
- ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask);
+ ret = bl_parse_deviceid(server, top, volumes, nr_volumes,
+ nr_volumes - 1, 0, gfp_mask);
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
--
2.43.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
@ 2026-04-23 5:15 ` Christoph Hellwig
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2026-04-23 5:15 UTC (permalink / raw)
To: Werner Kasselman
Cc: Trond Myklebust, Anna Schumaker, Christoph Hellwig,
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
stable@vger.kernel.org
On Tue, Apr 21, 2026 at 10:03:42AM +0000, Werner Kasselman wrote:
> #define PNFS_BLOCK_MAX_UUIDS 4
> #define PNFS_BLOCK_MAX_DEVICES 64
> +#define PNFS_BLOCK_MAX_DEPTH 16
I think we can and should reduce the nesting depth. The only really
useful nesting is mirroring + striping or concatenation. Giving a little
extra slack is fine, but I think 4 (or 8 if you insist) should be
enough,
> + int depth, gfp_t gfp_mask);
unsigned?
> default:
> @@ -559,6 +581,9 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
> goto out_free_scratch;
> nr_volumes = be32_to_cpup(p++);
>
> + if (nr_volumes <= 0)
> + goto out_free_scratch;
nr_volumes should be siwtched to an unsigned value, as it is over
the wire.
Otherwise looks good, thanks a lot!
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
@ 2026-04-21 10:03 ` Werner Kasselman
2026-04-23 5:18 ` Christoph Hellwig
2026-04-23 5:20 ` [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Christoph Hellwig
2 siblings, 1 reply; 6+ messages in thread
From: Werner Kasselman @ 2026-04-21 10:03 UTC (permalink / raw)
To: Trond Myklebust, Anna Schumaker
Cc: Christoph Hellwig, linux-nfs@vger.kernel.org,
linux-kernel@vger.kernel.org, stable@vger.kernel.org,
Werner Kasselman
The recursive-descent volume parser materializes a separate device
tree node for every volume reference. When CONCAT or STRIPE volumes
reference the same child index, the parser re-parses that subtree for
each reference, causing work exponential in nesting depth.
Cap the total number of bl_parse_deviceid() calls at
PNFS_BLOCK_MAX_PARSE_OPS (1024) to bound CPU and memory consumption
from server-controlled GETDEVICEINFO topologies.
Signed-off-by: Werner Kasselman <werner@verivus.com>
---
fs/nfs/blocklayout/blocklayout.h | 1 +
fs/nfs/blocklayout/dev.c | 31 +++++++++++++++++++------------
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index ec8917cc335d..6c00d98d4317 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -49,6 +49,7 @@ struct pnfs_block_dev;
#define PNFS_BLOCK_MAX_UUIDS 4
#define PNFS_BLOCK_MAX_DEVICES 64
#define PNFS_BLOCK_MAX_DEPTH 16
+#define PNFS_BLOCK_MAX_PARSE_OPS 1024
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index d9b1af863535..6e0df65c9b1f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -288,7 +288,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask);
+ int depth, int *remaining, gfp_t gfp_mask);
static int
@@ -441,13 +441,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
int ret;
ret = bl_parse_deviceid(server, d, volumes, nr_volumes,
- v->slice.volume, depth + 1, gfp_mask);
+ v->slice.volume, depth + 1, remaining,
+ gfp_mask);
if (ret)
return ret;
@@ -459,7 +460,7 @@ bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -473,7 +474,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
for (i = 0; i < v->concat.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i], volumes,
nr_volumes, v->concat.volumes[i],
- depth + 1, gfp_mask);
+ depth + 1, remaining, gfp_mask);
if (ret)
return ret;
@@ -490,7 +491,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
struct pnfs_block_volume *v = &volumes[idx];
u64 len = 0;
@@ -504,7 +505,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
for (i = 0; i < v->stripe.volumes_count; i++) {
ret = bl_parse_deviceid(server, &d->children[i], volumes,
nr_volumes, v->stripe.volumes[i],
- depth + 1, gfp_mask);
+ depth + 1, remaining, gfp_mask);
if (ret)
return ret;
@@ -521,7 +522,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
static int
bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int nr_volumes, int idx,
- int depth, gfp_t gfp_mask)
+ int depth, int *remaining, gfp_t gfp_mask)
{
if (idx < 0 || idx >= nr_volumes) {
dprintk("volume index %d out of range (0..%d)\n",
@@ -534,6 +535,11 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return -EIO;
}
+ if (--(*remaining) < 0) {
+ dprintk("volume topology too complex\n");
+ return -EIO;
+ }
+
d->type = volumes[idx].type;
switch (d->type) {
@@ -541,13 +547,13 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return bl_parse_simple(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_SLICE:
return bl_parse_slice(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_CONCAT:
return bl_parse_concat(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, nr_volumes,
- idx, depth, gfp_mask);
+ idx, depth, remaining, gfp_mask);
case PNFS_BLOCK_VOLUME_SCSI:
return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
@@ -567,6 +573,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
struct xdr_buf buf;
struct folio *scratch;
int nr_volumes, ret, i;
+ int remaining = PNFS_BLOCK_MAX_PARSE_OPS;
__be32 *p;
scratch = folio_alloc(gfp_mask, 0);
@@ -599,7 +606,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_free_volumes;
ret = bl_parse_deviceid(server, top, volumes, nr_volumes,
- nr_volumes - 1, 0, gfp_mask);
+ nr_volumes - 1, 0, &remaining, gfp_mask);
node = &top->node;
nfs4_init_deviceid_node(node, server, &pdev->dev_id);
--
2.43.0
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology
2026-04-21 10:03 ` [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology Werner Kasselman
@ 2026-04-23 5:18 ` Christoph Hellwig
0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2026-04-23 5:18 UTC (permalink / raw)
To: Werner Kasselman
Cc: Trond Myklebust, Anna Schumaker, Christoph Hellwig,
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
stable@vger.kernel.org
On Tue, Apr 21, 2026 at 10:03:44AM +0000, Werner Kasselman wrote:
> The recursive-descent volume parser materializes a separate device
> tree node for every volume reference. When CONCAT or STRIPE volumes
> reference the same child index, the parser re-parses that subtree for
> each reference, causing work exponential in nesting depth.
>
> Cap the total number of bl_parse_deviceid() calls at
> PNFS_BLOCK_MAX_PARSE_OPS (1024) to bound CPU and memory consumption
> from server-controlled GETDEVICEINFO topologies.
The OPS naming is a bit odd, these are called 'volumes' in the specs.
Which isn't a great name, but it generally helps to stick to the
spec terms. So maybe rename the constant, and also add a comment
explaining the limit to the code?
> + int depth, int *remaining, gfp_t gfp_mask);
Also use unsigned here as well. And maybe we should group the depth
and ops into a struct instead of adding more and more parameters?
Otherwise this looks good.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
2026-04-21 10:03 ` [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology Werner Kasselman
@ 2026-04-23 5:20 ` Christoph Hellwig
2 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2026-04-23 5:20 UTC (permalink / raw)
To: Werner Kasselman
Cc: Trond Myklebust, Anna Schumaker, Christoph Hellwig,
linux-nfs@vger.kernel.org, linux-kernel@vger.kernel.org,
stable@vger.kernel.org
On Tue, Apr 21, 2026 at 10:03:40AM +0000, Werner Kasselman wrote:
> A standalone test exercising all three bug classes and the fixes is at:
> tools/testing/pnfs-blocklayout/test-volume-parser.c
Not really, but maybe you wanted to send it out? :)
^ permalink raw reply [flat|nested] 6+ messages in thread