[RFC PATCH 0/7] Possible NFSD COPY clean-ups

Linux NFS development
 help / color / mirror / Atom feed

* [RFC PATCH 0/7] Possible NFSD COPY clean-ups
@ 2024-08-28 17:40 cel
  2024-08-28 17:40 ` [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier cel
                   ` (8 more replies)
  0 siblings, 9 replies; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

While working on OFFLOAD_STATUS and other potential improvements to
Linux NFS's COPY offload implementation, I've come up with a few
server-side observability enhancements and one or two possible bug
fixes. These are candidates to merge for v6.12.

Comments welcome.

Chuck Lever (7):
  NFSD: Async COPY result needs to return a write verifier
  NFSD: Limit the number of concurrent async COPY operations
  NFSD: Display copy stateids with conventional print formatting
  NFSD: Record the callback stateid in copy tracepoints
  NFSD: Clean up extra whitespace in trace_nfsd_copy_done
  NFSD: Document callback stateid laundering
  NFSD: Wrap async copy operations with trace points

 fs/nfsd/netns.h     |  1 +
 fs/nfsd/nfs4proc.c  | 38 +++++++++---------
 fs/nfsd/nfs4state.c | 37 ++++++++++++-----
 fs/nfsd/trace.h     | 97 +++++++++++++++++++++++++++++++++++++++++----
 fs/nfsd/xdr4.h      |  1 +
 5 files changed, 137 insertions(+), 37 deletions(-)

-- 
2.46.0


^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
@ 2024-08-28 17:40 ` cel
  2024-08-29 11:38   ` Jeff Layton
  2024-08-28 17:40 ` [RFC PATCH 2/7] NFSD: Limit the number of concurrent async COPY operations cel
                   ` (7 subsequent siblings)
  8 siblings, 1 reply; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

Currently, when NFSD handles an asynchronous COPY, it returns a
zero write verifier, relying on the subsequent CB_OFFLOAD callback
to pass the write verifier and a stable_how4 value to the client.

However, if the CB_OFFLOAD never arrives at the client (for example,
if a network partition occurs just as the server sends the
CB_OFFLOAD operation), the client will never receive this verifier.
Thus, if the client sends a follow-up COMMIT, there is no way for
the client to assess the COMMIT result.

The usual recovery for a missing CB_OFFLOAD is for the client to
send an OFFLOAD_STATUS operation, but that operation does not carry
a write verifier in its result. Neither does it carry a stable_how4
value, so the client /must/ send a COMMIT in this case -- which will
always fail because currently there's still no write verifier in the
COPY result.

Thus the server needs to return a normal write verifier in its COPY
result even if the COPY operation is to be performed asynchronously.

If the server recognizes the callback stateid in subsequent
OFFLOAD_STATUS operations, then obviously it has not restarted, and
the write verifier the client received in the COPY result is still
valid and can be used to assess a COMMIT of the copied data, if one
is needed.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 2e39cf2e502a..60c526adc27c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -751,15 +751,6 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			   &access->ac_supported);
 }
 
-static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
-{
-	__be32 *verf = (__be32 *)verifier->data;
-
-	BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
-
-	nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
-}
-
 static __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	     union nfsd4_op_u *u)
@@ -1630,7 +1621,6 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
 		test_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags) ?
 			NFS_FILE_SYNC : NFS_UNSTABLE;
 	nfsd4_copy_set_sync(copy, sync);
-	gen_boot_verifier(&copy->cp_res.wr_verifier, copy->cp_clp->net);
 }
 
 static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
@@ -1803,9 +1793,11 @@ static __be32
 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		union nfsd4_op_u *u)
 {
-	struct nfsd4_copy *copy = &u->copy;
-	__be32 status;
+	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 	struct nfsd4_copy *async_copy = NULL;
+	struct nfsd4_copy *copy = &u->copy;
+	struct nfsd42_write_res *result;
+	__be32 status;
 
 	/*
 	 * Currently, async COPY is not reliable. Force all COPY
@@ -1814,6 +1806,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 */
 	nfsd4_copy_set_sync(copy, true);
 
+	result = &copy->cp_res;
+	nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn);
+
 	copy->cp_clp = cstate->clp;
 	if (nfsd4_ssc_is_inter(copy)) {
 		trace_nfsd_copy_inter(copy);
@@ -1838,8 +1833,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
 		sizeof(struct knfsd_fh));
 	if (nfsd4_copy_is_async(copy)) {
-		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
-
 		status = nfserrno(-ENOMEM);
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy)
@@ -1851,8 +1844,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out_err;
 		if (!nfs4_init_copy_state(nn, copy))
 			goto out_err;
-		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
-			sizeof(copy->cp_res.cb_stateid));
+		memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
+			sizeof(result->cb_stateid));
 		dup_copy_fields(copy, async_copy);
 		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
 				async_copy, "%s", "copy thread");
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC PATCH 2/7] NFSD: Limit the number of concurrent async COPY operations
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
  2024-08-28 17:40 ` [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier cel
@ 2024-08-28 17:40 ` cel
  2024-08-29 11:45   ` Jeff Layton
  2024-08-28 17:40 ` [RFC PATCH 3/7] NFSD: Display copy stateids with conventional print formatting cel
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever, stable

From: Chuck Lever <chuck.lever@oracle.com>

Nothing appears to limit the number of concurrent async COPY
operations that clients can start. In addition, AFAICT each async
COPY can copy an unlimited number of 4MB chunks, so can run for a
long time. Thus IMO async COPY can become a DoS vector.

Add a restriction mechanism that bounds the number of concurrent
background COPY operations. Start simple and try to be fair -- this
patch implements a per-namespace limit.

An async COPY request that occurs while this limit is exceeded gets
NFS4ERR_DELAY. The requesting client can choose to send the request
again after a delay or fall back to a traditional read/write style
copy.

If there is need to make the mechanism more sophisticated, we can
visit that in future patches.

Cc: stable@vger.kernel.org
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/netns.h     |  1 +
 fs/nfsd/nfs4proc.c  | 12 ++++++++++--
 fs/nfsd/nfs4state.c |  1 +
 fs/nfsd/xdr4.h      |  1 +
 4 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 14ec15656320..5cae26917436 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -148,6 +148,7 @@ struct nfsd_net {
 	u32		s2s_cp_cl_id;
 	struct idr	s2s_cp_stateids;
 	spinlock_t	s2s_cp_lock;
+	atomic_t	pending_async_copies;
 
 	/*
 	 * Version information
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 60c526adc27c..27f7eceb3b00 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1279,6 +1279,7 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
 {
 	if (!refcount_dec_and_test(&copy->refcount))
 		return;
+	atomic_dec(&copy->cp_nn->pending_async_copies);
 	kfree(copy->cp_src);
 	kfree(copy);
 }
@@ -1833,10 +1834,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
 		sizeof(struct knfsd_fh));
 	if (nfsd4_copy_is_async(copy)) {
-		status = nfserrno(-ENOMEM);
+		/* Arbitrary cap on number of pending async copy operations */
+		int nrthreads = atomic_read(&rqstp->rq_pool->sp_nrthreads);
+
 		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
 		if (!async_copy)
 			goto out_err;
+		async_copy->cp_nn = nn;
+		if (atomic_inc_return(&nn->pending_async_copies) > nrthreads) {
+			atomic_dec(&nn->pending_async_copies);
+			goto out_err;
+		}
 		INIT_LIST_HEAD(&async_copy->copies);
 		refcount_set(&async_copy->refcount, 1);
 		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
@@ -1876,7 +1884,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	}
 	if (async_copy)
 		cleanup_async_copy(async_copy);
-	status = nfserrno(-ENOMEM);
+	status = nfserr_jukebox;
 	goto out;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a20c2c9d7d45..aaebc60cc77c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -8554,6 +8554,7 @@ static int nfs4_state_create_net(struct net *net)
 	spin_lock_init(&nn->client_lock);
 	spin_lock_init(&nn->s2s_cp_lock);
 	idr_init(&nn->s2s_cp_stateids);
+	atomic_set(&nn->pending_async_copies, 0);
 
 	spin_lock_init(&nn->blocked_locks_lock);
 	INIT_LIST_HEAD(&nn->blocked_locks_lru);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index fbdd42cde1fa..2a21a7662e03 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -713,6 +713,7 @@ struct nfsd4_copy {
 	struct nfsd4_ssc_umount_item *ss_nsui;
 	struct nfs_fh		c_fh;
 	nfs4_stateid		stateid;
+	struct nfsd_net		*cp_nn;
 };
 
 static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync)
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC PATCH 3/7] NFSD: Display copy stateids with conventional print formatting
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
  2024-08-28 17:40 ` [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier cel
  2024-08-28 17:40 ` [RFC PATCH 2/7] NFSD: Limit the number of concurrent async COPY operations cel
@ 2024-08-28 17:40 ` cel
  2024-08-28 17:40 ` [RFC PATCH 4/7] NFSD: Record the callback stateid in copy tracepoints cel
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

Make it easier to grep for s2s COPY stateids in trace logs: Use the
same display format in nfsd_copy_class as is used to display other
stateids.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/trace.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 77bbd23aa150..e61109d97b4e 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -2153,14 +2153,14 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
 				sizeof(struct sockaddr_in6));
 	),
 	TP_printk("client=%pISpc intra=%d async=%d "
-		"src_stateid[si_generation:0x%x cl_boot:0x%x cl_id:0x%x so_id:0x%x] "
-		"dst_stateid[si_generation:0x%x cl_boot:0x%x cl_id:0x%x so_id:0x%x] "
+		"src_client %08x:%08x src_stateid %08x:%08x "
+		"dst_client %08x:%08x dst_stateid %08x:%08x "
 		"cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
 		__get_sockaddr(addr), __entry->intra, __entry->async,
-		__entry->src_si_generation, __entry->src_cl_boot,
-		__entry->src_cl_id, __entry->src_so_id,
-		__entry->dst_si_generation, __entry->dst_cl_boot,
-		__entry->dst_cl_id, __entry->dst_so_id,
+		__entry->src_cl_boot, __entry->src_cl_id,
+		__entry->src_so_id, __entry->src_si_generation,
+		__entry->dst_cl_boot, __entry->dst_cl_id,
+		__entry->dst_so_id, __entry->dst_si_generation,
 		__entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
 	)
 );
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC PATCH 4/7] NFSD: Record the callback stateid in copy tracepoints
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
                   ` (2 preceding siblings ...)
  2024-08-28 17:40 ` [RFC PATCH 3/7] NFSD: Display copy stateids with conventional print formatting cel
@ 2024-08-28 17:40 ` cel
  2024-08-28 17:40 ` [RFC PATCH 5/7] NFSD: Clean up extra whitespace in trace_nfsd_copy_done cel
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

Match COPY operations up with CB_OFFLOAD operations.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/trace.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index e61109d97b4e..61ca9632021d 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -2127,6 +2127,10 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
 		__field(u32, dst_cl_id)
 		__field(u32, dst_so_id)
 		__field(u32, dst_si_generation)
+		__field(u32, cb_cl_boot)
+		__field(u32, cb_cl_id)
+		__field(u32, cb_so_id)
+		__field(u32, cb_si_generation)
 		__field(u64, src_cp_pos)
 		__field(u64, dst_cp_pos)
 		__field(u64, cp_count)
@@ -2135,6 +2139,7 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
 	TP_fast_assign(
 		const stateid_t *src_stp = &copy->cp_src_stateid;
 		const stateid_t *dst_stp = &copy->cp_dst_stateid;
+		const stateid_t *cb_stp = &copy->cp_res.cb_stateid;
 
 		__entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
 		__entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
@@ -2146,6 +2151,10 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
 		__entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id;
 		__entry->dst_so_id = dst_stp->si_opaque.so_id;
 		__entry->dst_si_generation = dst_stp->si_generation;
+		__entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot;
+		__entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id;
+		__entry->cb_so_id = cb_stp->si_opaque.so_id;
+		__entry->cb_si_generation = cb_stp->si_generation;
 		__entry->src_cp_pos = copy->cp_src_pos;
 		__entry->dst_cp_pos = copy->cp_dst_pos;
 		__entry->cp_count = copy->cp_count;
@@ -2155,12 +2164,15 @@ DECLARE_EVENT_CLASS(nfsd_copy_class,
 	TP_printk("client=%pISpc intra=%d async=%d "
 		"src_client %08x:%08x src_stateid %08x:%08x "
 		"dst_client %08x:%08x dst_stateid %08x:%08x "
+		"cb_client %08x:%08x cb_stateid %08x:%08x "
 		"cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
 		__get_sockaddr(addr), __entry->intra, __entry->async,
 		__entry->src_cl_boot, __entry->src_cl_id,
 		__entry->src_so_id, __entry->src_si_generation,
 		__entry->dst_cl_boot, __entry->dst_cl_id,
 		__entry->dst_so_id, __entry->dst_si_generation,
+		__entry->cb_cl_boot, __entry->cb_cl_id,
+		__entry->cb_so_id, __entry->cb_si_generation,
 		__entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
 	)
 );
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC PATCH 5/7] NFSD: Clean up extra whitespace in trace_nfsd_copy_done
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
                   ` (3 preceding siblings ...)
  2024-08-28 17:40 ` [RFC PATCH 4/7] NFSD: Record the callback stateid in copy tracepoints cel
@ 2024-08-28 17:40 ` cel
  2024-08-28 17:40 ` [RFC PATCH 6/7] NFSD: Document callback stateid laundering cel
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/trace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 61ca9632021d..d3bbd58b44de 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -2205,7 +2205,7 @@ TRACE_EVENT(nfsd_copy_done,
 		__assign_sockaddr(addr, &copy->cp_clp->cl_addr,
 				sizeof(struct sockaddr_in6));
 	),
-	TP_printk("addr=%pISpc status=%d intra=%d async=%d ",
+	TP_printk("addr=%pISpc status=%d intra=%d async=%d",
 		__get_sockaddr(addr), __entry->status, __entry->intra, __entry->async
 	)
 );
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC PATCH 6/7] NFSD: Document callback stateid laundering
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
                   ` (4 preceding siblings ...)
  2024-08-28 17:40 ` [RFC PATCH 5/7] NFSD: Clean up extra whitespace in trace_nfsd_copy_done cel
@ 2024-08-28 17:40 ` cel
  2024-08-28 22:49   ` Olga Kornievskaia
  2024-08-28 17:40 ` [RFC PATCH 7/7] NFSD: Wrap async copy operations with trace points cel
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

NFSD removes COPY callback stateids after once lease period. This
practice keeps the list of callback stateids limited to prevent a
DoS possibility, but doesn't comply with the spirit of RFC 7862
Section 4.8, which says:

> A copy offload stateid will be valid until either (A) the client or
> server restarts or (B) the client returns the resource by issuing an
> OFFLOAD_CANCEL operation or the client replies to a CB_OFFLOAD
> operation.

Note there are no BCP 14 compliance keywords in this text, so NFSD
is free to ignore this stateid lifetime guideline without becoming
non-compliant.

Nevertheless, this behavior variance should be explicitly documented
in the code.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index aaebc60cc77c..437b94beb115 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -6324,6 +6324,29 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
 }
 #endif
 
+/*
+ * RFC 7862 Section 4.8 says that, if the client hasn't replied to a
+ * CB_OFFLOAD, that COPY callback stateid will live until the client or
+ * server restarts. To prevent a DoS resulting from a pile of these
+ * stateids accruing over time, NFSD purges them after one lease period.
+ */
+static void nfs4_launder_cpntf_statelist(struct nfsd_net *nn,
+					 struct laundry_time *lt)
+{
+	struct nfs4_cpntf_state *cps;
+	copy_stateid_t *cps_t;
+	int i;
+
+	spin_lock(&nn->s2s_cp_lock);
+	idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
+		cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
+		if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
+				state_expired(lt, cps->cpntf_time))
+			_free_cpntf_state_locked(nn, cps);
+	}
+	spin_unlock(&nn->s2s_cp_lock);
+}
+
 /* Check if any lock belonging to this lockowner has any blockers */
 static bool
 nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
@@ -6495,9 +6518,6 @@ nfs4_laundromat(struct nfsd_net *nn)
 		.cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease,
 		.new_timeo = nn->nfsd4_lease
 	};
-	struct nfs4_cpntf_state *cps;
-	copy_stateid_t *cps_t;
-	int i;
 
 	if (clients_still_reclaiming(nn)) {
 		lt.new_timeo = 0;
@@ -6505,14 +6525,8 @@ nfs4_laundromat(struct nfsd_net *nn)
 	}
 	nfsd4_end_grace(nn);
 
-	spin_lock(&nn->s2s_cp_lock);
-	idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
-		cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
-		if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
-				state_expired(&lt, cps->cpntf_time))
-			_free_cpntf_state_locked(nn, cps);
-	}
-	spin_unlock(&nn->s2s_cp_lock);
+	nfs4_launder_cpntf_statelist(nn, &lt);
+
 	nfs4_get_client_reaplist(nn, &reaplist, &lt);
 	nfs4_process_client_reaplist(&reaplist);
 
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC PATCH 7/7] NFSD: Wrap async copy operations with trace points
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
                   ` (5 preceding siblings ...)
  2024-08-28 17:40 ` [RFC PATCH 6/7] NFSD: Document callback stateid laundering cel
@ 2024-08-28 17:40 ` cel
  2024-08-29 12:48 ` [RFC PATCH 0/7] Possible NFSD COPY clean-ups Jeff Layton
  2024-08-30 19:31 ` Chuck Lever III
  8 siblings, 0 replies; 14+ messages in thread
From: cel @ 2024-08-28 17:40 UTC (permalink / raw)
  To: linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

From: Chuck Lever <chuck.lever@oracle.com>

Add an nfsd_copy_async_done to record the timestamp, the final
status code, and the callback stateid of an async copy.

Rename the nfsd_copy_do_async tracepoint to match that naming
convention to make it easier to enable both of these with a
single glob.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c |  3 +-
 fs/nfsd/trace.h    | 71 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 27f7eceb3b00..8b2074c72206 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1758,7 +1758,7 @@ static int nfsd4_do_async_copy(void *data)
 {
 	struct nfsd4_copy *copy = (struct nfsd4_copy *)data;
 
-	trace_nfsd_copy_do_async(copy);
+	trace_nfsd_copy_async(copy);
 	if (nfsd4_ssc_is_inter(copy)) {
 		struct file *filp;
 
@@ -1785,6 +1785,7 @@ static int nfsd4_do_async_copy(void *data)
 
 do_callback:
 	set_bit(NFSD4_COPY_F_COMPLETED, &copy->cp_flags);
+	trace_nfsd_copy_async_done(copy);
 	nfsd4_send_cb_offload(copy);
 	cleanup_async_copy(copy);
 	return 0;
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index d3bbd58b44de..5febacd479b4 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -2184,7 +2184,7 @@ DEFINE_EVENT(nfsd_copy_class, nfsd_copy_##name,	\
 
 DEFINE_COPY_EVENT(inter);
 DEFINE_COPY_EVENT(intra);
-DEFINE_COPY_EVENT(do_async);
+DEFINE_COPY_EVENT(async);
 
 TRACE_EVENT(nfsd_copy_done,
 	TP_PROTO(
@@ -2210,6 +2210,75 @@ TRACE_EVENT(nfsd_copy_done,
 	)
 );
 
+TRACE_EVENT(nfsd_copy_async_done,
+	TP_PROTO(
+		const struct nfsd4_copy *copy
+	),
+	TP_ARGS(copy),
+	TP_STRUCT__entry(
+		__field(int, status)
+		__field(bool, intra)
+		__field(bool, async)
+		__field(u32, src_cl_boot)
+		__field(u32, src_cl_id)
+		__field(u32, src_so_id)
+		__field(u32, src_si_generation)
+		__field(u32, dst_cl_boot)
+		__field(u32, dst_cl_id)
+		__field(u32, dst_so_id)
+		__field(u32, dst_si_generation)
+		__field(u32, cb_cl_boot)
+		__field(u32, cb_cl_id)
+		__field(u32, cb_so_id)
+		__field(u32, cb_si_generation)
+		__field(u64, src_cp_pos)
+		__field(u64, dst_cp_pos)
+		__field(u64, cp_count)
+		__sockaddr(addr, sizeof(struct sockaddr_in6))
+	),
+	TP_fast_assign(
+		const stateid_t *src_stp = &copy->cp_src_stateid;
+		const stateid_t *dst_stp = &copy->cp_dst_stateid;
+		const stateid_t *cb_stp = &copy->cp_res.cb_stateid;
+
+		__entry->status = be32_to_cpu(copy->nfserr);
+		__entry->intra = test_bit(NFSD4_COPY_F_INTRA, &copy->cp_flags);
+		__entry->async = !test_bit(NFSD4_COPY_F_SYNCHRONOUS, &copy->cp_flags);
+		__entry->src_cl_boot = src_stp->si_opaque.so_clid.cl_boot;
+		__entry->src_cl_id = src_stp->si_opaque.so_clid.cl_id;
+		__entry->src_so_id = src_stp->si_opaque.so_id;
+		__entry->src_si_generation = src_stp->si_generation;
+		__entry->dst_cl_boot = dst_stp->si_opaque.so_clid.cl_boot;
+		__entry->dst_cl_id = dst_stp->si_opaque.so_clid.cl_id;
+		__entry->dst_so_id = dst_stp->si_opaque.so_id;
+		__entry->dst_si_generation = dst_stp->si_generation;
+		__entry->cb_cl_boot = cb_stp->si_opaque.so_clid.cl_boot;
+		__entry->cb_cl_id = cb_stp->si_opaque.so_clid.cl_id;
+		__entry->cb_so_id = cb_stp->si_opaque.so_id;
+		__entry->cb_si_generation = cb_stp->si_generation;
+		__entry->src_cp_pos = copy->cp_src_pos;
+		__entry->dst_cp_pos = copy->cp_dst_pos;
+		__entry->cp_count = copy->cp_count;
+		__assign_sockaddr(addr, &copy->cp_clp->cl_addr,
+				sizeof(struct sockaddr_in6));
+	),
+	TP_printk("client=%pISpc status=%d intra=%d async=%d "
+		"src_client %08x:%08x src_stateid %08x:%08x "
+		"dst_client %08x:%08x dst_stateid %08x:%08x "
+		"cb_client %08x:%08x cb_stateid %08x:%08x "
+		"cp_src_pos=%llu cp_dst_pos=%llu cp_count=%llu",
+		__get_sockaddr(addr),
+		__entry->status, __entry->intra, __entry->async,
+		__entry->src_cl_boot, __entry->src_cl_id,
+		__entry->src_so_id, __entry->src_si_generation,
+		__entry->dst_cl_boot, __entry->dst_cl_id,
+		__entry->dst_so_id, __entry->dst_si_generation,
+		__entry->cb_cl_boot, __entry->cb_cl_id,
+		__entry->cb_so_id, __entry->cb_si_generation,
+		__entry->src_cp_pos, __entry->dst_cp_pos, __entry->cp_count
+	)
+);
+
 #endif /* _NFSD_TRACE_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
2.46.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 6/7] NFSD: Document callback stateid laundering
  2024-08-28 17:40 ` [RFC PATCH 6/7] NFSD: Document callback stateid laundering cel
@ 2024-08-28 22:49   ` Olga Kornievskaia
  2024-08-29 14:05     ` Chuck Lever III
  0 siblings, 1 reply; 14+ messages in thread
From: Olga Kornievskaia @ 2024-08-28 22:49 UTC (permalink / raw)
  To: cel; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Jeff Layton, Chuck Lever

On Wed, Aug 28, 2024 at 1:40 PM <cel@kernel.org> wrote:
>
> From: Chuck Lever <chuck.lever@oracle.com>
>
> NFSD removes COPY callback stateids after once lease period. This
> practice keeps the list of callback stateids limited to prevent a
> DoS possibility, but doesn't comply with the spirit of RFC 7862
> Section 4.8, which says:
>
> > A copy offload stateid will be valid until either (A) the client or
> > server restarts or (B) the client returns the resource by issuing an
> > OFFLOAD_CANCEL operation or the client replies to a CB_OFFLOAD
> > operation.
>
> Note there are no BCP 14 compliance keywords in this text, so NFSD
> is free to ignore this stateid lifetime guideline without becoming
> non-compliant.
>
> Nevertheless, this behavior variance should be explicitly documented
> in the code.
>
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++-----------
>  1 file changed, 25 insertions(+), 11 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index aaebc60cc77c..437b94beb115 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -6324,6 +6324,29 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
>  }
>  #endif
>
> +/*
> + * RFC 7862 Section 4.8 says that, if the client hasn't replied to a
> + * CB_OFFLOAD, that COPY callback stateid will live until the client or
> + * server restarts. To prevent a DoS resulting from a pile of these
> + * stateids accruing over time, NFSD purges them after one lease period.
> + */

I don't believe this is correct documentation for this piece of code.
There are two kinds of stateids in the copy offload world: one is
issued by the source server "cnr_stateid" in the response of the
COPY_NOTIFY  and given to be client (to be given to the destination
server to use for the read)  and those are the ones kept in the
knfsd's s2s_cp_stateids list and then cleaned up by the laundry thread
when copy isn't cleaned up on the source server. The text from the RFC
quoted here is for copy's callback stateids. In the current
implementation, we don't keep callback stateids around past when the
async copy is done. I agree that needs to be changed for
OFFLOAD_STATUS op and then we can add the wording of how long we are
keeping those.

> +static void nfs4_launder_cpntf_statelist(struct nfsd_net *nn,
> +                                        struct laundry_time *lt)
> +{
> +       struct nfs4_cpntf_state *cps;
> +       copy_stateid_t *cps_t;
> +       int i;
> +
> +       spin_lock(&nn->s2s_cp_lock);
> +       idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
> +               cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
> +               if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
> +                               state_expired(lt, cps->cpntf_time))
> +                       _free_cpntf_state_locked(nn, cps);
> +       }
> +       spin_unlock(&nn->s2s_cp_lock);
> +}
> +
>  /* Check if any lock belonging to this lockowner has any blockers */
>  static bool
>  nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
> @@ -6495,9 +6518,6 @@ nfs4_laundromat(struct nfsd_net *nn)
>                 .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease,
>                 .new_timeo = nn->nfsd4_lease
>         };
> -       struct nfs4_cpntf_state *cps;
> -       copy_stateid_t *cps_t;
> -       int i;
>
>         if (clients_still_reclaiming(nn)) {
>                 lt.new_timeo = 0;
> @@ -6505,14 +6525,8 @@ nfs4_laundromat(struct nfsd_net *nn)
>         }
>         nfsd4_end_grace(nn);
>
> -       spin_lock(&nn->s2s_cp_lock);
> -       idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
> -               cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
> -               if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
> -                               state_expired(&lt, cps->cpntf_time))
> -                       _free_cpntf_state_locked(nn, cps);
> -       }
> -       spin_unlock(&nn->s2s_cp_lock);
> +       nfs4_launder_cpntf_statelist(nn, &lt);
> +
>         nfs4_get_client_reaplist(nn, &reaplist, &lt);
>         nfs4_process_client_reaplist(&reaplist);
>
> --
> 2.46.0
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier
  2024-08-28 17:40 ` [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier cel
@ 2024-08-29 11:38   ` Jeff Layton
  0 siblings, 0 replies; 14+ messages in thread
From: Jeff Layton @ 2024-08-29 11:38 UTC (permalink / raw)
  To: cel, linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever

On Wed, 2024-08-28 at 13:40 -0400, cel@kernel.org wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> Currently, when NFSD handles an asynchronous COPY, it returns a
> zero write verifier, relying on the subsequent CB_OFFLOAD callback
> to pass the write verifier and a stable_how4 value to the client.
> 
> However, if the CB_OFFLOAD never arrives at the client (for example,
> if a network partition occurs just as the server sends the
> CB_OFFLOAD operation), the client will never receive this verifier.
> Thus, if the client sends a follow-up COMMIT, there is no way for
> the client to assess the COMMIT result.
> 
> The usual recovery for a missing CB_OFFLOAD is for the client to
> send an OFFLOAD_STATUS operation, but that operation does not carry
> a write verifier in its result. Neither does it carry a stable_how4
> value, so the client /must/ send a COMMIT in this case -- which will
> always fail because currently there's still no write verifier in the
> COPY result.
> 
> Thus the server needs to return a normal write verifier in its COPY
> result even if the COPY operation is to be performed asynchronously.
> 
> If the server recognizes the callback stateid in subsequent
> OFFLOAD_STATUS operations, then obviously it has not restarted, and
> the write verifier the client received in the COPY result is still
> valid and can be used to assess a COMMIT of the copied data, if one
> is needed.
> 
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/nfs4proc.c | 25 +++++++++----------------
>  1 file changed, 9 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 2e39cf2e502a..60c526adc27c 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -751,15 +751,6 @@ nfsd4_access(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			   &access->ac_supported);
>  }
>  
> -static void gen_boot_verifier(nfs4_verifier *verifier, struct net *net)
> -{
> -	__be32 *verf = (__be32 *)verifier->data;
> -
> -	BUILD_BUG_ON(2*sizeof(*verf) != sizeof(verifier->data));
> -
> -	nfsd_copy_write_verifier(verf, net_generic(net, nfsd_net_id));
> -}
> -
>  static __be32
>  nfsd4_commit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	     union nfsd4_op_u *u)
> @@ -1630,7 +1621,6 @@ static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync)
>  		test_bit(NFSD4_COPY_F_COMMITTED, &copy->cp_flags) ?
>  			NFS_FILE_SYNC : NFS_UNSTABLE;
>  	nfsd4_copy_set_sync(copy, sync);
> -	gen_boot_verifier(&copy->cp_res.wr_verifier, copy->cp_clp->net);
>  }
>  
>  static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy,
> @@ -1803,9 +1793,11 @@ static __be32
>  nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		union nfsd4_op_u *u)
>  {
> -	struct nfsd4_copy *copy = &u->copy;
> -	__be32 status;
> +	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
>  	struct nfsd4_copy *async_copy = NULL;
> +	struct nfsd4_copy *copy = &u->copy;
> +	struct nfsd42_write_res *result;
> +	__be32 status;
>  
>  	/*
>  	 * Currently, async COPY is not reliable. Force all COPY
> @@ -1814,6 +1806,9 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 */
>  	nfsd4_copy_set_sync(copy, true);
>  
> +	result = &copy->cp_res;
> +	nfsd_copy_write_verifier((__be32 *)&result->wr_verifier.data, nn);
> +
>  	copy->cp_clp = cstate->clp;
>  	if (nfsd4_ssc_is_inter(copy)) {
>  		trace_nfsd_copy_inter(copy);
> @@ -1838,8 +1833,6 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
>  		sizeof(struct knfsd_fh));
>  	if (nfsd4_copy_is_async(copy)) {
> -		struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> -
>  		status = nfserrno(-ENOMEM);
>  		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>  		if (!async_copy)
> @@ -1851,8 +1844,8 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			goto out_err;
>  		if (!nfs4_init_copy_state(nn, copy))
>  			goto out_err;
> -		memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid.cs_stid,
> -			sizeof(copy->cp_res.cb_stateid));
> +		memcpy(&result->cb_stateid, &copy->cp_stateid.cs_stid,
> +			sizeof(result->cb_stateid));
>  		dup_copy_fields(copy, async_copy);
>  		async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
>  				async_copy, "%s", "copy thread");

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 2/7] NFSD: Limit the number of concurrent async COPY operations
  2024-08-28 17:40 ` [RFC PATCH 2/7] NFSD: Limit the number of concurrent async COPY operations cel
@ 2024-08-29 11:45   ` Jeff Layton
  0 siblings, 0 replies; 14+ messages in thread
From: Jeff Layton @ 2024-08-29 11:45 UTC (permalink / raw)
  To: cel, linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever, stable

On Wed, 2024-08-28 at 13:40 -0400, cel@kernel.org wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> Nothing appears to limit the number of concurrent async COPY
> operations that clients can start. In addition, AFAICT each async
> COPY can copy an unlimited number of 4MB chunks, so can run for a
> long time. Thus IMO async COPY can become a DoS vector.
> 
> Add a restriction mechanism that bounds the number of concurrent
> background COPY operations. Start simple and try to be fair -- this
> patch implements a per-namespace limit.
> 
> An async COPY request that occurs while this limit is exceeded gets
> NFS4ERR_DELAY. The requesting client can choose to send the request
> again after a delay or fall back to a traditional read/write style
> copy.
> 
> If there is need to make the mechanism more sophisticated, we can
> visit that in future patches.
> 
> Cc: stable@vger.kernel.org
> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
> ---
>  fs/nfsd/netns.h     |  1 +
>  fs/nfsd/nfs4proc.c  | 12 ++++++++++--
>  fs/nfsd/nfs4state.c |  1 +
>  fs/nfsd/xdr4.h      |  1 +
>  4 files changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
> index 14ec15656320..5cae26917436 100644
> --- a/fs/nfsd/netns.h
> +++ b/fs/nfsd/netns.h
> @@ -148,6 +148,7 @@ struct nfsd_net {
>  	u32		s2s_cp_cl_id;
>  	struct idr	s2s_cp_stateids;
>  	spinlock_t	s2s_cp_lock;
> +	atomic_t	pending_async_copies;
>  
>  	/*
>  	 * Version information
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 60c526adc27c..27f7eceb3b00 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1279,6 +1279,7 @@ static void nfs4_put_copy(struct nfsd4_copy *copy)
>  {
>  	if (!refcount_dec_and_test(&copy->refcount))
>  		return;
> +	atomic_dec(&copy->cp_nn->pending_async_copies);
>  	kfree(copy->cp_src);
>  	kfree(copy);
>  }
> @@ -1833,10 +1834,17 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	memcpy(&copy->fh, &cstate->current_fh.fh_handle,
>  		sizeof(struct knfsd_fh));
>  	if (nfsd4_copy_is_async(copy)) {
> -		status = nfserrno(-ENOMEM);
> +		/* Arbitrary cap on number of pending async copy operations */
> +		int nrthreads = atomic_read(&rqstp->rq_pool->sp_nrthreads);
> +
>  		async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>  		if (!async_copy)
>  			goto out_err;
> +		async_copy->cp_nn = nn;
> +		if (atomic_inc_return(&nn->pending_async_copies) > nrthreads) {
> +			atomic_dec(&nn->pending_async_copies);
> +			goto out_err;
> +		}
>  		INIT_LIST_HEAD(&async_copy->copies);
>  		refcount_set(&async_copy->refcount, 1);
>  		async_copy->cp_src = kmalloc(sizeof(*async_copy->cp_src), GFP_KERNEL);
> @@ -1876,7 +1884,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	}
>  	if (async_copy)
>  		cleanup_async_copy(async_copy);
> -	status = nfserrno(-ENOMEM);
> +	status = nfserr_jukebox;

ENOMEM gets translated to nfserr_jukebox anyway, so this doesn't change
anything (good).

>  	goto out;
>  }
>  
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index a20c2c9d7d45..aaebc60cc77c 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -8554,6 +8554,7 @@ static int nfs4_state_create_net(struct net *net)
>  	spin_lock_init(&nn->client_lock);
>  	spin_lock_init(&nn->s2s_cp_lock);
>  	idr_init(&nn->s2s_cp_stateids);
> +	atomic_set(&nn->pending_async_copies, 0);
>  
>  	spin_lock_init(&nn->blocked_locks_lock);
>  	INIT_LIST_HEAD(&nn->blocked_locks_lru);
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index fbdd42cde1fa..2a21a7662e03 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -713,6 +713,7 @@ struct nfsd4_copy {
>  	struct nfsd4_ssc_umount_item *ss_nsui;
>  	struct nfs_fh		c_fh;
>  	nfs4_stateid		stateid;
> +	struct nfsd_net		*cp_nn;
>  };
>  
>  static inline void nfsd4_copy_set_sync(struct nfsd4_copy *copy, bool sync)

A per-ns limit sounds like a reasonable place to start.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 0/7] Possible NFSD COPY clean-ups
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
                   ` (6 preceding siblings ...)
  2024-08-28 17:40 ` [RFC PATCH 7/7] NFSD: Wrap async copy operations with trace points cel
@ 2024-08-29 12:48 ` Jeff Layton
  2024-08-30 19:31 ` Chuck Lever III
  8 siblings, 0 replies; 14+ messages in thread
From: Jeff Layton @ 2024-08-29 12:48 UTC (permalink / raw)
  To: cel, linux-nfs; +Cc: Olga Kornievskaia, Dai Ngo, Chuck Lever

On Wed, 2024-08-28 at 13:40 -0400, cel@kernel.org wrote:
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> While working on OFFLOAD_STATUS and other potential improvements to
> Linux NFS's COPY offload implementation, I've come up with a few
> server-side observability enhancements and one or two possible bug
> fixes. These are candidates to merge for v6.12.
> 
> Comments welcome.
> 
> Chuck Lever (7):
>   NFSD: Async COPY result needs to return a write verifier
>   NFSD: Limit the number of concurrent async COPY operations
>   NFSD: Display copy stateids with conventional print formatting
>   NFSD: Record the callback stateid in copy tracepoints
>   NFSD: Clean up extra whitespace in trace_nfsd_copy_done
>   NFSD: Document callback stateid laundering
>   NFSD: Wrap async copy operations with trace points
> 
>  fs/nfsd/netns.h     |  1 +
>  fs/nfsd/nfs4proc.c  | 38 +++++++++---------
>  fs/nfsd/nfs4state.c | 37 ++++++++++++-----
>  fs/nfsd/trace.h     | 97 +++++++++++++++++++++++++++++++++++++++++----
>  fs/nfsd/xdr4.h      |  1 +
>  5 files changed, 137 insertions(+), 37 deletions(-)
> 

I sent some R-b's already but the rest of the series looks fine to me.

Reviewed-by: Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 6/7] NFSD: Document callback stateid laundering
  2024-08-28 22:49   ` Olga Kornievskaia
@ 2024-08-29 14:05     ` Chuck Lever III
  0 siblings, 0 replies; 14+ messages in thread
From: Chuck Lever III @ 2024-08-29 14:05 UTC (permalink / raw)
  To: Olga Kornievskaia
  Cc: Chuck Lever, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Jeff Layton



> On Aug 28, 2024, at 6:49 PM, Olga Kornievskaia <aglo@umich.edu> wrote:
> 
> On Wed, Aug 28, 2024 at 1:40 PM <cel@kernel.org> wrote:
>> 
>> From: Chuck Lever <chuck.lever@oracle.com>
>> 
>> NFSD removes COPY callback stateids after once lease period. This
>> practice keeps the list of callback stateids limited to prevent a
>> DoS possibility, but doesn't comply with the spirit of RFC 7862
>> Section 4.8, which says:
>> 
>>> A copy offload stateid will be valid until either (A) the client or
>>> server restarts or (B) the client returns the resource by issuing an
>>> OFFLOAD_CANCEL operation or the client replies to a CB_OFFLOAD
>>> operation.
>> 
>> Note there are no BCP 14 compliance keywords in this text, so NFSD
>> is free to ignore this stateid lifetime guideline without becoming
>> non-compliant.
>> 
>> Nevertheless, this behavior variance should be explicitly documented
>> in the code.
>> 
>> Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
>> ---
>> fs/nfsd/nfs4state.c | 36 +++++++++++++++++++++++++-----------
>> 1 file changed, 25 insertions(+), 11 deletions(-)
>> 
>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>> index aaebc60cc77c..437b94beb115 100644
>> --- a/fs/nfsd/nfs4state.c
>> +++ b/fs/nfsd/nfs4state.c
>> @@ -6324,6 +6324,29 @@ static void nfsd4_ssc_expire_umount(struct nfsd_net *nn)
>> }
>> #endif
>> 
>> +/*
>> + * RFC 7862 Section 4.8 says that, if the client hasn't replied to a
>> + * CB_OFFLOAD, that COPY callback stateid will live until the client or
>> + * server restarts. To prevent a DoS resulting from a pile of these
>> + * stateids accruing over time, NFSD purges them after one lease period.
>> + */
> 
> I don't believe this is correct documentation for this piece of code.
> There are two kinds of stateids in the copy offload world: one is
> issued by the source server "cnr_stateid" in the response of the
> COPY_NOTIFY  and given to be client (to be given to the destination
> server to use for the read)  and those are the ones kept in the
> knfsd's s2s_cp_stateids list and then cleaned up by the laundry thread
> when copy isn't cleaned up on the source server.

Got it.

That detail was not included in what you mentioned to me
before ;-) Either I can drop this patch, or please suggest
a corrected text and I will replace the comment.


> The text from the RFC
> quoted here is for copy's callback stateids.

I'll look for something more relevant, if only for my own
edification.


> In the current
> implementation, we don't keep callback stateids around past when the
> async copy is done. I agree that needs to be changed for
> OFFLOAD_STATUS op and then we can add the wording of how long we are
> keeping those.

Yep, as we discussed yesterday.


>> +static void nfs4_launder_cpntf_statelist(struct nfsd_net *nn,
>> +                                        struct laundry_time *lt)
>> +{
>> +       struct nfs4_cpntf_state *cps;
>> +       copy_stateid_t *cps_t;
>> +       int i;
>> +
>> +       spin_lock(&nn->s2s_cp_lock);
>> +       idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
>> +               cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
>> +               if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
>> +                               state_expired(lt, cps->cpntf_time))
>> +                       _free_cpntf_state_locked(nn, cps);
>> +       }
>> +       spin_unlock(&nn->s2s_cp_lock);
>> +}
>> +
>> /* Check if any lock belonging to this lockowner has any blockers */
>> static bool
>> nfs4_lockowner_has_blockers(struct nfs4_lockowner *lo)
>> @@ -6495,9 +6518,6 @@ nfs4_laundromat(struct nfsd_net *nn)
>>                .cutoff = ktime_get_boottime_seconds() - nn->nfsd4_lease,
>>                .new_timeo = nn->nfsd4_lease
>>        };
>> -       struct nfs4_cpntf_state *cps;
>> -       copy_stateid_t *cps_t;
>> -       int i;
>> 
>>        if (clients_still_reclaiming(nn)) {
>>                lt.new_timeo = 0;
>> @@ -6505,14 +6525,8 @@ nfs4_laundromat(struct nfsd_net *nn)
>>        }
>>        nfsd4_end_grace(nn);
>> 
>> -       spin_lock(&nn->s2s_cp_lock);
>> -       idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
>> -               cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
>> -               if (cps->cp_stateid.cs_type == NFS4_COPYNOTIFY_STID &&
>> -                               state_expired(&lt, cps->cpntf_time))
>> -                       _free_cpntf_state_locked(nn, cps);
>> -       }
>> -       spin_unlock(&nn->s2s_cp_lock);
>> +       nfs4_launder_cpntf_statelist(nn, &lt);
>> +
>>        nfs4_get_client_reaplist(nn, &reaplist, &lt);
>>        nfs4_process_client_reaplist(&reaplist);
>> 
>> --
>> 2.46.0


--
Chuck Lever



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC PATCH 0/7] Possible NFSD COPY clean-ups
  2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
                   ` (7 preceding siblings ...)
  2024-08-29 12:48 ` [RFC PATCH 0/7] Possible NFSD COPY clean-ups Jeff Layton
@ 2024-08-30 19:31 ` Chuck Lever III
  8 siblings, 0 replies; 14+ messages in thread
From: Chuck Lever III @ 2024-08-30 19:31 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo, Jeff Layton



> On Aug 28, 2024, at 1:40 PM, cel@kernel.org wrote:
> 
> From: Chuck Lever <chuck.lever@oracle.com>
> 
> While working on OFFLOAD_STATUS and other potential improvements to
> Linux NFS's COPY offload implementation, I've come up with a few
> server-side observability enhancements and one or two possible bug
> fixes. These are candidates to merge for v6.12.
> 
> Comments welcome.
> 
> Chuck Lever (7):
>  NFSD: Async COPY result needs to return a write verifier
>  NFSD: Limit the number of concurrent async COPY operations
>  NFSD: Display copy stateids with conventional print formatting
>  NFSD: Record the callback stateid in copy tracepoints
>  NFSD: Clean up extra whitespace in trace_nfsd_copy_done
>  NFSD: Document callback stateid laundering
>  NFSD: Wrap async copy operations with trace points
> 
> fs/nfsd/netns.h     |  1 +
> fs/nfsd/nfs4proc.c  | 38 +++++++++---------
> fs/nfsd/nfs4state.c | 37 ++++++++++++-----
> fs/nfsd/trace.h     | 97 +++++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/xdr4.h      |  1 +
> 5 files changed, 137 insertions(+), 37 deletions(-)
> 
> -- 
> 2.46.0

I've applied all but "NFSD: Document callback stateid laundering"
to nfsd-next.

--
Chuck Lever



^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2024-08-30 19:31 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-28 17:40 [RFC PATCH 0/7] Possible NFSD COPY clean-ups cel
2024-08-28 17:40 ` [RFC PATCH 1/7] NFSD: Async COPY result needs to return a write verifier cel
2024-08-29 11:38   ` Jeff Layton
2024-08-28 17:40 ` [RFC PATCH 2/7] NFSD: Limit the number of concurrent async COPY operations cel
2024-08-29 11:45   ` Jeff Layton
2024-08-28 17:40 ` [RFC PATCH 3/7] NFSD: Display copy stateids with conventional print formatting cel
2024-08-28 17:40 ` [RFC PATCH 4/7] NFSD: Record the callback stateid in copy tracepoints cel
2024-08-28 17:40 ` [RFC PATCH 5/7] NFSD: Clean up extra whitespace in trace_nfsd_copy_done cel
2024-08-28 17:40 ` [RFC PATCH 6/7] NFSD: Document callback stateid laundering cel
2024-08-28 22:49   ` Olga Kornievskaia
2024-08-29 14:05     ` Chuck Lever III
2024-08-28 17:40 ` [RFC PATCH 7/7] NFSD: Wrap async copy operations with trace points cel
2024-08-29 12:48 ` [RFC PATCH 0/7] Possible NFSD COPY clean-ups Jeff Layton
2024-08-30 19:31 ` Chuck Lever III

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox