[PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology

public inbox for stable@vger.kernel.org
 help / color / mirror / Atom feed

From: Werner Kasselman <werner@verivus.ai>
To: Trond Myklebust <trondmy@kernel.org>, Anna Schumaker <anna@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>,
	"linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"stable@vger.kernel.org" <stable@vger.kernel.org>,
	Werner Kasselman <werner@verivus.ai>
Subject: [PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology
Date: Tue, 21 Apr 2026 10:03:44 +0000	[thread overview]
Message-ID: <20260421100338.1227152-3-werner@verivus.com> (raw)
In-Reply-To: <20260421100338.1227152-1-werner@verivus.com>

The recursive-descent volume parser materializes a separate device
tree node for every volume reference.  When CONCAT or STRIPE volumes
reference the same child index, the parser re-parses that subtree for
each reference, causing work exponential in nesting depth.

Cap the total number of bl_parse_deviceid() calls at
PNFS_BLOCK_MAX_PARSE_OPS (1024) to bound CPU and memory consumption
from server-controlled GETDEVICEINFO topologies.

Signed-off-by: Werner Kasselman <werner@verivus.com>
---
 fs/nfs/blocklayout/blocklayout.h |  1 +
 fs/nfs/blocklayout/dev.c         | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index ec8917cc335d..6c00d98d4317 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -49,6 +49,7 @@ struct pnfs_block_dev;
 #define PNFS_BLOCK_MAX_UUIDS	4
 #define PNFS_BLOCK_MAX_DEVICES	64
 #define PNFS_BLOCK_MAX_DEPTH	16
+#define PNFS_BLOCK_MAX_PARSE_OPS 1024
 
 /*
  * Random upper cap for the uuid length to avoid unbounded allocation.
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index d9b1af863535..6e0df65c9b1f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -288,7 +288,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
 static int
 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int nr_volumes, int idx,
-		int depth, gfp_t gfp_mask);
+		int depth, int *remaining, gfp_t gfp_mask);
 
 
 static int
@@ -441,13 +441,14 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
 static int
 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int nr_volumes, int idx,
-		int depth, gfp_t gfp_mask)
+		int depth, int *remaining, gfp_t gfp_mask)
 {
 	struct pnfs_block_volume *v = &volumes[idx];
 	int ret;
 
 	ret = bl_parse_deviceid(server, d, volumes, nr_volumes,
-				v->slice.volume, depth + 1, gfp_mask);
+				v->slice.volume, depth + 1, remaining,
+				gfp_mask);
 	if (ret)
 		return ret;
 
@@ -459,7 +460,7 @@ bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
 static int
 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int nr_volumes, int idx,
-		int depth, gfp_t gfp_mask)
+		int depth, int *remaining, gfp_t gfp_mask)
 {
 	struct pnfs_block_volume *v = &volumes[idx];
 	u64 len = 0;
@@ -473,7 +474,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
 	for (i = 0; i < v->concat.volumes_count; i++) {
 		ret = bl_parse_deviceid(server, &d->children[i], volumes,
 				nr_volumes, v->concat.volumes[i],
-				depth + 1, gfp_mask);
+				depth + 1, remaining, gfp_mask);
 		if (ret)
 			return ret;
 
@@ -490,7 +491,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d,
 static int
 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int nr_volumes, int idx,
-		int depth, gfp_t gfp_mask)
+		int depth, int *remaining, gfp_t gfp_mask)
 {
 	struct pnfs_block_volume *v = &volumes[idx];
 	u64 len = 0;
@@ -504,7 +505,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
 	for (i = 0; i < v->stripe.volumes_count; i++) {
 		ret = bl_parse_deviceid(server, &d->children[i], volumes,
 				nr_volumes, v->stripe.volumes[i],
-				depth + 1, gfp_mask);
+				depth + 1, remaining, gfp_mask);
 		if (ret)
 			return ret;
 
@@ -521,7 +522,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d,
 static int
 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 		struct pnfs_block_volume *volumes, int nr_volumes, int idx,
-		int depth, gfp_t gfp_mask)
+		int depth, int *remaining, gfp_t gfp_mask)
 {
 	if (idx < 0 || idx >= nr_volumes) {
 		dprintk("volume index %d out of range (0..%d)\n",
@@ -534,6 +535,11 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 		return -EIO;
 	}
 
+	if (--(*remaining) < 0) {
+		dprintk("volume topology too complex\n");
+		return -EIO;
+	}
+
 	d->type = volumes[idx].type;
 
 	switch (d->type) {
@@ -541,13 +547,13 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
 		return bl_parse_simple(server, d, volumes, idx, gfp_mask);
 	case PNFS_BLOCK_VOLUME_SLICE:
 		return bl_parse_slice(server, d, volumes, nr_volumes,
-				idx, depth, gfp_mask);
+				idx, depth, remaining, gfp_mask);
 	case PNFS_BLOCK_VOLUME_CONCAT:
 		return bl_parse_concat(server, d, volumes, nr_volumes,
-				idx, depth, gfp_mask);
+				idx, depth, remaining, gfp_mask);
 	case PNFS_BLOCK_VOLUME_STRIPE:
 		return bl_parse_stripe(server, d, volumes, nr_volumes,
-				idx, depth, gfp_mask);
+				idx, depth, remaining, gfp_mask);
 	case PNFS_BLOCK_VOLUME_SCSI:
 		return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
 	default:
@@ -567,6 +573,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 	struct xdr_buf buf;
 	struct folio *scratch;
 	int nr_volumes, ret, i;
+	int remaining = PNFS_BLOCK_MAX_PARSE_OPS;
 	__be32 *p;
 
 	scratch = folio_alloc(gfp_mask, 0);
@@ -599,7 +606,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
 		goto out_free_volumes;
 
 	ret = bl_parse_deviceid(server, top, volumes, nr_volumes,
-				nr_volumes - 1, 0, gfp_mask);
+				nr_volumes - 1, 0, &remaining, gfp_mask);
 
 	node = &top->node;
 	nfs4_init_deviceid_node(node, server, &pdev->dev_id);
-- 
2.43.0

     prev parent reply	other threads:[~2026-04-21 10:03 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-21 10:03 [PATCH 0/2] pnfs/blocklayout: harden GETDEVICEINFO volume parser Werner Kasselman
2026-04-21 10:03 ` [PATCH 1/2] pnfs/blocklayout: validate volume indices and limit recursion depth Werner Kasselman
2026-04-21 10:03 ` Werner Kasselman [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:ec8917cc335 dfblob:6c00d98d431 dfblob:d9b1af86353
dfblob:6e0df65c9b1 )
 OR (
bs:"[PATCH 2/2] pnfs/blocklayout: cap total parse operations in volume topology" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260421100338.1227152-3-werner@verivus.com \
    --to=werner@verivus.ai \
    --cc=anna@kernel.org \
    --cc=hch@lst.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=trondmy@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox