linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND"
@ 2025-10-02 14:00 Chuck Lever
  2025-10-02 14:00 ` [PATCH v2] svcrdma: Increase the server's default RPC/RDMA credit grant Chuck Lever
  2025-10-02 16:34 ` [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" tianshuo han
  0 siblings, 2 replies; 3+ messages in thread
From: Chuck Lever @ 2025-10-02 14:00 UTC (permalink / raw)
  To: NeilBrown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey
  Cc: linux-nfs, Chuck Lever, tianshuo han

From: Chuck Lever <chuck.lever@oracle.com>

I've found that pynfs COMP6 now leaves the connection or lease in a
strange state, which causes CLOSE9 to hang indefinitely. I've dug
into it a little, but I haven't been able to root-cause it yet.
However, I bisected to commit 48aab1606fa8 ("NFSD: Remove the cap on
number of operations per NFSv4 COMPOUND").

Tianshuo Han also reports a potential vulnerability when decoding
an NFSv4 COMPOUND. An attacker can place an arbitrarily large op
count in the COMPOUND header, which results in:

[   51.410584] nfsd: vmalloc error: size 1209533382144, exceeds total
pages, mode:0xdc0(GFP_KERNEL|__GFP_ZERO),
nodemask=(null),cpuset=/,mems_allowed=0

when NFSD attempts to allocate the COMPOUND op array.

Let's restore the per operation-per-COMPOUND limit, but increased to
200 for now.

Reported-by: tianshuo han <hantianshuo233@gmail.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
X-Cc: stable@vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c  | 14 ++++++++++++--
 fs/nfsd/nfs4state.c |  1 +
 fs/nfsd/nfs4xdr.c   |  4 +++-
 fs/nfsd/nfsd.h      |  3 +++
 fs/nfsd/xdr4.h      |  1 +
 5 files changed, 20 insertions(+), 3 deletions(-)

Changes since v1:
* Patch description updates

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f9aeefc0da73..7f7e6bb23a90 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2893,10 +2893,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 
 	rqstp->rq_lease_breaker = (void **)&cstate->clp;
 
-	trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt);
+	trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
 	while (!status && resp->opcnt < args->opcnt) {
 		op = &args->ops[resp->opcnt++];
 
+		if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
+			/* If there are still more operations to process,
+			 * stop here and report NFS4ERR_RESOURCE. */
+			if (cstate->minorversion == 0 &&
+			    args->client_opcnt > resp->opcnt) {
+				op->status = nfserr_resource;
+				goto encode_op;
+			}
+		}
+
 		/*
 		 * The XDR decode routines may have pre-set op->status;
 		 * for example, if there is a miscellaneous XDR error
@@ -2973,7 +2983,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
 			status = op->status;
 		}
 
-		trace_nfsd_compound_status(args->opcnt, resp->opcnt,
+		trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
 					   status, nfsd4_op_name(op->opnum));
 
 		nfsd4_cstate_clear_replay(cstate);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ad2c45658c46..c9053ef4d79f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3902,6 +3902,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
 	ca->headerpadsz = 0;
 	ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
 	ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
+	ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
 	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
 			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
 	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 6a56dca6fb04..230bf53e39f7 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2488,8 +2488,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 
 	if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
 		return false;
-	if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
+	if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
 		return false;
+	argp->opcnt = min_t(u32, argp->client_opcnt,
+			    NFSD_MAX_OPS_PER_COMPOUND);
 
 	if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
 		argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 6812cd231b1d..8ffed4f0b95f 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -57,6 +57,9 @@ struct readdir_cd {
 	__be32			err;	/* 0, nfserr, or nfserr_eof */
 };
 
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND	200
+
 struct nfsd_genl_rqstp {
 	struct sockaddr		rq_daddr;
 	struct sockaddr		rq_saddr;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d4b48602b2b0..ee0570cbdd9e 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -903,6 +903,7 @@ struct nfsd4_compoundargs {
 	char *				tag;
 	u32				taglen;
 	u32				minorversion;
+	u32				client_opcnt;
 	u32				opcnt;
 	bool				splice_ok;
 	struct nfsd4_op			*ops;
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v2] svcrdma: Increase the server's default RPC/RDMA credit grant
  2025-10-02 14:00 [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" Chuck Lever
@ 2025-10-02 14:00 ` Chuck Lever
  2025-10-02 16:34 ` [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" tianshuo han
  1 sibling, 0 replies; 3+ messages in thread
From: Chuck Lever @ 2025-10-02 14:00 UTC (permalink / raw)
  To: NeilBrown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey
  Cc: linux-nfs, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

The range of commits from commit e3274026e2ec ("SUNRPC: move all of
xprt handling into svc_xprt_handle()") to commit 15d39883ee7d
("SUNRPC: change the back-channel queue to lwq") enabled NFSD
performance to scale better as the number of nfsd threads is
increased. These commits were merged in v6.7.

Now that the nfsd thread count can scale to more threads, permit
individual clients to make more use of those threads. Increase the
RPC/RDMA per-connection credit grant from 64 to 128 -- same as the
Linux NFS client.

Simple single client fio-based benchmarking so far shows only
improvement, no regression.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 include/linux/sunrpc/svc_rdma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Changes since v1:
* Patch description updates

diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 22704c2e5b9b..57f4fd94166a 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -131,7 +131,7 @@ static inline struct svcxprt_rdma *svc_rdma_rqst_rdma(struct svc_rqst *rqstp)
  */
 enum {
 	RPCRDMA_LISTEN_BACKLOG	= 10,
-	RPCRDMA_MAX_REQUESTS	= 64,
+	RPCRDMA_MAX_REQUESTS	= 128,
 	RPCRDMA_MAX_BC_REQUESTS	= 2,
 };
 
-- 
2.51.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND"
  2025-10-02 14:00 [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" Chuck Lever
  2025-10-02 14:00 ` [PATCH v2] svcrdma: Increase the server's default RPC/RDMA credit grant Chuck Lever
@ 2025-10-02 16:34 ` tianshuo han
  1 sibling, 0 replies; 3+ messages in thread
From: tianshuo han @ 2025-10-02 16:34 UTC (permalink / raw)
  To: Chuck Lever
  Cc: NeilBrown, Jeff Layton, Olga Kornievskaia, Dai Ngo, Tom Talpey,
	linux-nfs, Chuck Lever

Tested-by: Tianshuo Han <hantianshuo233@gmail.com>

I have tested this patch and can confirm it resolves the vulnerability
I reported. Thank you very much for including the attribution and for
addressing this issue so promptly!

Best regards,
Tianshuo

On Thu, Oct 2, 2025 at 10:00 PM Chuck Lever <cel@kernel.org> wrote:
>
> From: Chuck Lever <chuck.lever@oracle.com>
>
> I've found that pynfs COMP6 now leaves the connection or lease in a
> strange state, which causes CLOSE9 to hang indefinitely. I've dug
> into it a little, but I haven't been able to root-cause it yet.
> However, I bisected to commit 48aab1606fa8 ("NFSD: Remove the cap on
> number of operations per NFSv4 COMPOUND").
>
> Tianshuo Han also reports a potential vulnerability when decoding
> an NFSv4 COMPOUND. An attacker can place an arbitrarily large op
> count in the COMPOUND header, which results in:
>
> [   51.410584] nfsd: vmalloc error: size 1209533382144, exceeds total
> pages, mode:0xdc0(GFP_KERNEL|__GFP_ZERO),
> nodemask=(null),cpuset=/,mems_allowed=0
>
> when NFSD attempts to allocate the COMPOUND op array.
>
> Let's restore the per operation-per-COMPOUND limit, but increased to
> 200 for now.
>
> Reported-by: tianshuo han <hantianshuo233@gmail.com>
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> X-Cc: stable@vger.kernel.org
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c  | 14 ++++++++++++--
>  fs/nfsd/nfs4state.c |  1 +
>  fs/nfsd/nfs4xdr.c   |  4 +++-
>  fs/nfsd/nfsd.h      |  3 +++
>  fs/nfsd/xdr4.h      |  1 +
>  5 files changed, 20 insertions(+), 3 deletions(-)
>
> Changes since v1:
> * Patch description updates
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index f9aeefc0da73..7f7e6bb23a90 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2893,10 +2893,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
>
>         rqstp->rq_lease_breaker = (void **)&cstate->clp;
>
> -       trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt);
> +       trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
>         while (!status && resp->opcnt < args->opcnt) {
>                 op = &args->ops[resp->opcnt++];
>
> +               if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
> +                       /* If there are still more operations to process,
> +                        * stop here and report NFS4ERR_RESOURCE. */
> +                       if (cstate->minorversion == 0 &&
> +                           args->client_opcnt > resp->opcnt) {
> +                               op->status = nfserr_resource;
> +                               goto encode_op;
> +                       }
> +               }
> +
>                 /*
>                  * The XDR decode routines may have pre-set op->status;
>                  * for example, if there is a miscellaneous XDR error
> @@ -2973,7 +2983,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
>                         status = op->status;
>                 }
>
> -               trace_nfsd_compound_status(args->opcnt, resp->opcnt,
> +               trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
>                                            status, nfsd4_op_name(op->opnum));
>
>                 nfsd4_cstate_clear_replay(cstate);
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index ad2c45658c46..c9053ef4d79f 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3902,6 +3902,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
>         ca->headerpadsz = 0;
>         ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
>         ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
> +       ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
>         ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
>                         NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
>         ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 6a56dca6fb04..230bf53e39f7 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2488,8 +2488,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
>
>         if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
>                 return false;
> -       if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
> +       if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
>                 return false;
> +       argp->opcnt = min_t(u32, argp->client_opcnt,
> +                           NFSD_MAX_OPS_PER_COMPOUND);
>
>         if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
>                 argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 6812cd231b1d..8ffed4f0b95f 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -57,6 +57,9 @@ struct readdir_cd {
>         __be32                  err;    /* 0, nfserr, or nfserr_eof */
>  };
>
> +/* Maximum number of operations per session compound */
> +#define NFSD_MAX_OPS_PER_COMPOUND      200
> +
>  struct nfsd_genl_rqstp {
>         struct sockaddr         rq_daddr;
>         struct sockaddr         rq_saddr;
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index d4b48602b2b0..ee0570cbdd9e 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -903,6 +903,7 @@ struct nfsd4_compoundargs {
>         char *                          tag;
>         u32                             taglen;
>         u32                             minorversion;
> +       u32                             client_opcnt;
>         u32                             opcnt;
>         bool                            splice_ok;
>         struct nfsd4_op                 *ops;
> --
> 2.51.0
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2025-10-02 16:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-10-02 14:00 [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" Chuck Lever
2025-10-02 14:00 ` [PATCH v2] svcrdma: Increase the server's default RPC/RDMA credit grant Chuck Lever
2025-10-02 16:34 ` [PATCH v2] Revert "NFSD: Remove the cap on number of operations per NFSv4 COMPOUND" tianshuo han

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).