[PATCH 5/6] nfsd: add support for freeing unused session-DRC slots

public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19 19:25   ` Chuck Lever
  2024-11-19 19:48   ` Jeff Layton
  0 siblings, 2 replies; 33+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 8 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
 fs/nfsd/nfs4xdr.c   |  5 +--
 fs/nfsd/state.h     |  4 +++
 fs/nfsd/xdr4.h      |  2 --
 4 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb522165b376..0625b0aec6b8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
+			 GFP_ATOMIC);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
@@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * gently try to allocate another one.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
+		void *prev_slot;
 
 		/*
 		 * GFP_NOWAIT is a low-priority non-blocking allocation
@@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 * allocation.
 		 */
 		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
+		prev_slot = xa_load(&session->se_slots, s);
+		if (xa_is_value(prev_slot) && slot) {
+			slot->sl_seqid = xa_to_value(prev_slot);
+			slot->sl_flags |= NFSD4_SLOT_REUSED;
+		}
 		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
-						GFP_ATOMIC)))
+						GFP_ATOMIC))) {
 			session->se_fchannel.maxreqs += 1;
-		else
+			session->se_target_maxslots = session->se_fchannel.maxreqs;
+		} else {
 			kfree(slot);
+		}
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5c79494bd20b..b281a2198ff3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a14a823670e9..ea6659d52be2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -268,7 +268,9 @@ struct nfsd4_slot {
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
+	u8	sl_generation;
 	char	sl_data[];
 };
 
@@ -350,6 +352,8 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u8			se_slot_gen;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2024-11-19 19:25   ` Chuck Lever
  2024-11-19 22:35     ` NeilBrown
  2024-11-19 19:48   ` Jeff Layton
  1 sibling, 1 reply; 33+ messages in thread
From: Chuck Lever @ 2024-11-19 19:25 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fb522165b376..0625b0aec6b8 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> +			 GFP_ATOMIC);

Again... ATOMIC is probably not what we want here, even if it is
only documentary.

And, I thought we determined that an unretired slot had a sequence
number that is reset. Why save the slot's seqid? If I'm missing
something, the comment here should be bolstered to explain it.


>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> @@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * gently try to allocate another one.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
> +		void *prev_slot;
>  
>  		/*
>  		 * GFP_NOWAIT is a low-priority non-blocking allocation
> @@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		 * allocation.
>  		 */
>  		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		prev_slot = xa_load(&session->se_slots, s);
> +		if (xa_is_value(prev_slot) && slot) {
> +			slot->sl_seqid = xa_to_value(prev_slot);
> +			slot->sl_flags |= NFSD4_SLOT_REUSED;
> +		}
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> -						GFP_ATOMIC)))
> +						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> -		else
> +			session->se_target_maxslots = session->se_fchannel.maxreqs;
> +		} else {
>  			kfree(slot);
> +		}
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 5c79494bd20b..b281a2198ff3 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index a14a823670e9..ea6659d52be2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -268,7 +268,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -350,6 +352,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  
> -- 
> 2.47.0
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
  2024-11-19 19:25   ` Chuck Lever
@ 2024-11-19 19:48   ` Jeff Layton
  1 sibling, 0 replies; 33+ messages in thread
From: Jeff Layton @ 2024-11-19 19:48 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 

Personally, I think that resetting to 1 is the only sane choice. After
shrinking the slot table, either side is free to forget the slot
information. When the slot is resurrected, we need to treat it as a new
slot. Expecting the server to remember all seqids, and their cached
replies for all slots ever used on a session seems like an open-ended
mandate.

That said, I'm ok with the server being accepting here, in case there
are client implementations that have done it the other way. Some clear
guidance from the RFCs would sure be nice though.

> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fb522165b376..0625b0aec6b8 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> +			 GFP_ATOMIC);
>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> @@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * gently try to allocate another one.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
> +		void *prev_slot;
>  
>  		/*
>  		 * GFP_NOWAIT is a low-priority non-blocking allocation
> @@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		 * allocation.
>  		 */
>  		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		prev_slot = xa_load(&session->se_slots, s);
> +		if (xa_is_value(prev_slot) && slot) {
> +			slot->sl_seqid = xa_to_value(prev_slot);
> +			slot->sl_flags |= NFSD4_SLOT_REUSED;
> +		}
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> -						GFP_ATOMIC)))
> +						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> -		else
> +			session->se_target_maxslots = session->se_fchannel.maxreqs;
> +		} else {
>  			kfree(slot);
> +		}
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 5c79494bd20b..b281a2198ff3 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index a14a823670e9..ea6659d52be2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -268,7 +268,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -350,6 +352,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19 19:25   ` Chuck Lever
@ 2024-11-19 22:35     ` NeilBrown
  2024-11-20  1:27       ` Chuck Lever
  0 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-11-19 22:35 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> >  fs/nfsd/nfs4xdr.c   |  5 +--
> >  fs/nfsd/state.h     |  4 +++
> >  fs/nfsd/xdr4.h      |  2 --
> >  4 files changed, 76 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fb522165b376..0625b0aec6b8 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >  
> >  static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >  {
> >  	int i;
> >  
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >  
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > +			 GFP_ATOMIC);
> 
> Again... ATOMIC is probably not what we want here, even if it is
> only documentary.

Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
a warning.

> 
> And, I thought we determined that an unretired slot had a sequence
> number that is reset. Why save the slot's seqid? If I'm missing
> something, the comment here should be bolstered to explain it.

It isn't clear to me that we determined that - only the some people
asserted it.  Until the spec is clarified I think it is safest to be
cautious.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19 22:35     ` NeilBrown
@ 2024-11-20  1:27       ` Chuck Lever
  2024-11-21 21:47         ` NeilBrown
  0 siblings, 1 reply; 33+ messages in thread
From: Chuck Lever @ 2024-11-20  1:27 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> On Wed, 20 Nov 2024, Chuck Lever wrote:
> > On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > > Reducing the number of slots in the session slot table requires
> > > confirmation from the client.  This patch adds reduce_session_slots()
> > > which starts the process of getting confirmation, but never calls it.
> > > That will come in a later patch.
> > > 
> > > Before we can free a slot we need to confirm that the client won't try
> > > to use it again.  This involves returning a lower cr_maxrequests in a
> > > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > > is not larger than we limit we are trying to impose.  So for each slot
> > > we need to remember that we have sent a reduced cr_maxrequests.
> > > 
> > > To achieve this we introduce a concept of request "generations".  Each
> > > time we decide to reduce cr_maxrequests we increment the generation
> > > number, and record this when we return the lower cr_maxrequests to the
> > > client.  When a slot with the current generation reports a low
> > > ca_maxrequests, we commit to that level and free extra slots.
> > > 
> > > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > > we iterate all slots and reset the generation number to avoid false matches.
> > > 
> > > When we free a slot we store the seqid in the slot pointer so that it can
> > > be restored when we reactivate the slot.  The RFC can be read as
> > > suggesting that the slot number could restart from one after a slot is
> > > retired and reactivated, but also suggests that retiring slots is not
> > > required.  So when we reactive a slot we accept with the next seqid in
> > > sequence, or 1.
> > > 
> > > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > > matches how it is encoded for the reply.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> > >  fs/nfsd/nfs4xdr.c   |  5 +--
> > >  fs/nfsd/state.h     |  4 +++
> > >  fs/nfsd/xdr4.h      |  2 --
> > >  4 files changed, 76 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index fb522165b376..0625b0aec6b8 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> > >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> > >  
> > >  static void
> > > -free_session_slots(struct nfsd4_session *ses)
> > > +free_session_slots(struct nfsd4_session *ses, int from)
> > >  {
> > >  	int i;
> > >  
> > > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > > +	if (from >= ses->se_fchannel.maxreqs)
> > > +		return;
> > > +
> > > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> > >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > >  
> > > -		xa_erase(&ses->se_slots, i);
> > > +		/*
> > > +		 * Save the seqid in case we reactivate this slot.
> > > +		 * This will never require a memory allocation so GFP
> > > +		 * flag is irrelevant
> > > +		 */
> > > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > > +			 GFP_ATOMIC);
> > 
> > Again... ATOMIC is probably not what we want here, even if it is
> > only documentary.
> 
> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> a warning.

I find using GFP_ATOMIC here to be confusing -- it requests
allocation from special memory reserves and is to be used in
situations where allocation might result in system failure. That is
clearly not the case here, and the resulting memory allocation might
be long-lived.

I see the comment that says memory won't actually be allocated. I'm
not sure that's the way xa_store() works, however.

I don't immediately see another good choice, however. I can reach
out to Matthew and Liam and see if they have a better idea.


> > And, I thought we determined that an unretired slot had a sequence
> > number that is reset. Why save the slot's seqid? If I'm missing
> > something, the comment here should be bolstered to explain it.
> 
> It isn't clear to me that we determined that - only the some people
> asserted it.

From what I've read, everyone else who responded has said "use one".
And they have provided enough spec quotations that 1 seems like the
right initial slot sequence number value, always.

You should trust Tom Talpey's opinion on this. He was directly
involved 25 years ago when sessions were invented in DAFS and then
transferred into the NFSv4.1 protocol.


> Until the spec is clarified I think it is safest to be cautious.

The usual line we draw for adding code/features/complexity is the
proposer must demonstrate a use case for it. So far I have not seen
a client implementation that needs a server to remember the sequence
number in a slot that has been shrunken and then re-activated.

Will this dead slot be subject to being freed by the session
shrinker?

But the proposed implementation accepts 1 in this case, and it
doesn't seem tremendously difficult to remove the "remember the
seqid" mechanism once it has been codified to everyone's
satisfaction. So I won't belabor the point.


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-20  1:27       ` Chuck Lever
@ 2024-11-21 21:47         ` NeilBrown
  2024-11-21 22:29           ` Chuck Lever III
  0 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-11-21 21:47 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> > On Wed, 20 Nov 2024, Chuck Lever wrote:
> > > On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > > > Reducing the number of slots in the session slot table requires
> > > > confirmation from the client.  This patch adds reduce_session_slots()
> > > > which starts the process of getting confirmation, but never calls it.
> > > > That will come in a later patch.
> > > > 
> > > > Before we can free a slot we need to confirm that the client won't try
> > > > to use it again.  This involves returning a lower cr_maxrequests in a
> > > > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > > > is not larger than we limit we are trying to impose.  So for each slot
> > > > we need to remember that we have sent a reduced cr_maxrequests.
> > > > 
> > > > To achieve this we introduce a concept of request "generations".  Each
> > > > time we decide to reduce cr_maxrequests we increment the generation
> > > > number, and record this when we return the lower cr_maxrequests to the
> > > > client.  When a slot with the current generation reports a low
> > > > ca_maxrequests, we commit to that level and free extra slots.
> > > > 
> > > > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > > > we iterate all slots and reset the generation number to avoid false matches.
> > > > 
> > > > When we free a slot we store the seqid in the slot pointer so that it can
> > > > be restored when we reactivate the slot.  The RFC can be read as
> > > > suggesting that the slot number could restart from one after a slot is
> > > > retired and reactivated, but also suggests that retiring slots is not
> > > > required.  So when we reactive a slot we accept with the next seqid in
> > > > sequence, or 1.
> > > > 
> > > > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > > > matches how it is encoded for the reply.
> > > > 
> > > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > > ---
> > > >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> > > >  fs/nfsd/nfs4xdr.c   |  5 +--
> > > >  fs/nfsd/state.h     |  4 +++
> > > >  fs/nfsd/xdr4.h      |  2 --
> > > >  4 files changed, 76 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > index fb522165b376..0625b0aec6b8 100644
> > > > --- a/fs/nfsd/nfs4state.c
> > > > +++ b/fs/nfsd/nfs4state.c
> > > > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> > > >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> > > >  
> > > >  static void
> > > > -free_session_slots(struct nfsd4_session *ses)
> > > > +free_session_slots(struct nfsd4_session *ses, int from)
> > > >  {
> > > >  	int i;
> > > >  
> > > > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > > > +	if (from >= ses->se_fchannel.maxreqs)
> > > > +		return;
> > > > +
> > > > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> > > >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > > >  
> > > > -		xa_erase(&ses->se_slots, i);
> > > > +		/*
> > > > +		 * Save the seqid in case we reactivate this slot.
> > > > +		 * This will never require a memory allocation so GFP
> > > > +		 * flag is irrelevant
> > > > +		 */
> > > > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > > > +			 GFP_ATOMIC);
> > > 
> > > Again... ATOMIC is probably not what we want here, even if it is
> > > only documentary.
> > 
> > Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> > a warning.
> 
> I find using GFP_ATOMIC here to be confusing -- it requests
> allocation from special memory reserves and is to be used in
> situations where allocation might result in system failure. That is
> clearly not the case here, and the resulting memory allocation might
> be long-lived.

Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??

My understanding of how GFP_ATOMIC is used is it is what people choose
when they have to allocate in a no-sleep context.  It can fail and there
must always be a fall-back option.  In many cases GFP_NOWAIT could
possibly be used when it isn't a high priority, but there are 430 uses
for GFP_NOWAIT compared with over 5000 of GFP_ATOMIC.


> 
> I see the comment that says memory won't actually be allocated. I'm
> not sure that's the way xa_store() works, however.

xarray.rst says:

  The xa_store(), xa_cmpxchg(), xa_alloc(),
  xa_reserve() and xa_insert() functions take a gfp_t
  parameter in case the XArray needs to allocate memory to store this entry.
  If the entry is being deleted, no memory allocation needs to be performed,
  and the GFP flags specified will be ignored.`

The particular context is that a normal pointer is currently stored a
the given index, and we are replacing that with a number.  The above
doesn't explicitly say that won't require a memory allocation, but I
think it is reasonable to say it won't need "to allocate memory to store
this entry" - as an entry is already stored - so allocation should not
be needed.

> 
> I don't immediately see another good choice, however. I can reach
> out to Matthew and Liam and see if they have a better idea.
> 
> 
> > > And, I thought we determined that an unretired slot had a sequence
> > > number that is reset. Why save the slot's seqid? If I'm missing
> > > something, the comment here should be bolstered to explain it.
> > 
> > It isn't clear to me that we determined that - only the some people
> > asserted it.
> 
> From what I've read, everyone else who responded has said "use one".
> And they have provided enough spec quotations that 1 seems like the
> right initial slot sequence number value, always.
> 
> You should trust Tom Talpey's opinion on this. He was directly
> involved 25 years ago when sessions were invented in DAFS and then
> transferred into the NFSv4.1 protocol.

Dave Noveck (also deeply involved) say:

   It does.  The problem is that it undercuts the core goal of the
   slot-based approach 
   In that it makes it possible to have multiple requests with the same
   session id/ slot ID / sequence triple.

i.e.  resetting to 1 undercuts the core goal....  That is not a
resounding endorsement.

While I respect the people, I prefer to trust reasoned arguments.

What exactly is the sequence number protecting against?  It must be
protecting against a request being sent, not reply received, connection
closed, request sent on another connection, reply received.  Original
request getting to the server before the "close" message.  Server must
be sure not to handle this request.  sequence number provides that.

But what if the above all happens for request with seqno of 1, then the
server and client negotiate a "retiring" of slot 1 and then it's reuse
before the original request arrives.  How does the server know to ignore
it?

And Tom said that the handling of maxreq etc is "optional" for both
server and client.  So how can the server know if the client has retired
the slot when doing so is optional???

I really don't think we have clarity on this at all.

> 
> 
> > Until the spec is clarified I think it is safest to be cautious.
> 
> The usual line we draw for adding code/features/complexity is the
> proposer must demonstrate a use case for it. So far I have not seen
> a client implementation that needs a server to remember the sequence
> number in a slot that has been shrunken and then re-activated.

And I cannot in good faith submit code that I think violates the spec.

> 
> Will this dead slot be subject to being freed by the session
> shrinker?

No, but it uses much much less space than a slot.

> 
> But the proposed implementation accepts 1 in this case, and it
> doesn't seem tremendously difficult to remove the "remember the
> seqid" mechanism once it has been codified to everyone's
> satisfaction. So I won't belabor the point.

Thank you!

NeilBrown

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-21 21:47         ` NeilBrown
@ 2024-11-21 22:29           ` Chuck Lever III
  2024-12-02 16:11             ` Chuck Lever III
  0 siblings, 1 reply; 33+ messages in thread
From: Chuck Lever III @ 2024-11-21 22:29 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Wed, 20 Nov 2024, Chuck Lever wrote:
>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>> Reducing the number of slots in the session slot table requires
>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>> which starts the process of getting confirmation, but never calls it.
>>>>> That will come in a later patch.
>>>>> 
>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>> 
>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>> client.  When a slot with the current generation reports a low
>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>> 
>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>> 
>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>> suggesting that the slot number could restart from one after a slot is
>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>> sequence, or 1.
>>>>> 
>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>> matches how it is encoded for the reply.
>>>>> 
>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>> ---
>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>> fs/nfsd/state.h     |  4 +++
>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>> --- a/fs/nfsd/nfs4state.c
>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>> 
>>>>> static void
>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>> {
>>>>> int i;
>>>>> 
>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>> + return;
>>>>> +
>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>> 
>>>>> - xa_erase(&ses->se_slots, i);
>>>>> + /*
>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>> +  * This will never require a memory allocation so GFP
>>>>> +  * flag is irrelevant
>>>>> +  */
>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>> +  GFP_ATOMIC);
>>>> 
>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>> only documentary.
>>> 
>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>> a warning.
>> 
>> I find using GFP_ATOMIC here to be confusing -- it requests
>> allocation from special memory reserves and is to be used in
>> situations where allocation might result in system failure. That is
>> clearly not the case here, and the resulting memory allocation might
>> be long-lived.
> 
> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??

I will be comfortable when I hear back from Matthew and Liam.

:-)


>> I see the comment that says memory won't actually be allocated. I'm
>> not sure that's the way xa_store() works, however.
> 
> xarray.rst says:
> 
>  The xa_store(), xa_cmpxchg(), xa_alloc(),
>  xa_reserve() and xa_insert() functions take a gfp_t
>  parameter in case the XArray needs to allocate memory to store this entry.
>  If the entry is being deleted, no memory allocation needs to be performed,
>  and the GFP flags specified will be ignored.`
> 
> The particular context is that a normal pointer is currently stored a
> the given index, and we are replacing that with a number.  The above
> doesn't explicitly say that won't require a memory allocation, but I
> think it is reasonable to say it won't need "to allocate memory to store
> this entry" - as an entry is already stored - so allocation should not
> be needed.

xa_mk_value() converts a number to a pointer, and xa_store
stores that pointer.

I suspect that xa_store() is allowed to rebalance the
xarray's internal data structures, and that could result
in memory release or allocation. That's why a GFP flag is
one of the arguments.


--
Chuck Lever



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-21 22:29           ` Chuck Lever III
@ 2024-12-02 16:11             ` Chuck Lever III
  2024-12-03  4:28               ` NeilBrown
  0 siblings, 1 reply; 33+ messages in thread
From: Chuck Lever III @ 2024-12-02 16:11 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Nov 21, 2024, at 5:29 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> 
> 
> 
>> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
>> 
>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>>> Reducing the number of slots in the session slot table requires
>>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>>> which starts the process of getting confirmation, but never calls it.
>>>>>> That will come in a later patch.
>>>>>> 
>>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>>> 
>>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>>> client.  When a slot with the current generation reports a low
>>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>>> 
>>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>>> 
>>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>>> suggesting that the slot number could restart from one after a slot is
>>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>>> sequence, or 1.
>>>>>> 
>>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>>> matches how it is encoded for the reply.
>>>>>> 
>>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>>> ---
>>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>>> fs/nfsd/state.h     |  4 +++
>>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>>> 
>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>>> 
>>>>>> static void
>>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>>> {
>>>>>> int i;
>>>>>> 
>>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>>> + return;
>>>>>> +
>>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>>> 
>>>>>> - xa_erase(&ses->se_slots, i);
>>>>>> + /*
>>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>>> +  * This will never require a memory allocation so GFP
>>>>>> +  * flag is irrelevant
>>>>>> +  */
>>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>>> +  GFP_ATOMIC);
>>>>> 
>>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>>> only documentary.
>>>> 
>>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>>> a warning.
>>> 
>>> I find using GFP_ATOMIC here to be confusing -- it requests
>>> allocation from special memory reserves and is to be used in
>>> situations where allocation might result in system failure. That is
>>> clearly not the case here, and the resulting memory allocation might
>>> be long-lived.
>> 
>> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??
> 
> I will be comfortable when I hear back from Matthew and Liam.
> 
> :-)
> 
> 
>>> I see the comment that says memory won't actually be allocated. I'm
>>> not sure that's the way xa_store() works, however.
>> 
>> xarray.rst says:
>> 
>> The xa_store(), xa_cmpxchg(), xa_alloc(),
>> xa_reserve() and xa_insert() functions take a gfp_t
>> parameter in case the XArray needs to allocate memory to store this entry.
>> If the entry is being deleted, no memory allocation needs to be performed,
>> and the GFP flags specified will be ignored.`
>> 
>> The particular context is that a normal pointer is currently stored a
>> the given index, and we are replacing that with a number.  The above
>> doesn't explicitly say that won't require a memory allocation, but I
>> think it is reasonable to say it won't need "to allocate memory to store
>> this entry" - as an entry is already stored - so allocation should not
>> be needed.
> 
> xa_mk_value() converts a number to a pointer, and xa_store
> stores that pointer.
> 
> I suspect that xa_store() is allowed to rebalance the
> xarray's internal data structures, and that could result
> in memory release or allocation. That's why a GFP flag is
> one of the arguments.

Matthew says the xa_store() is guaranteed not to do a memory
allocation in this case. However, they prefer an annotation
of the call site with a "0" GFP argument to show that the
allocation flags are not relevant.

Does this:

	xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);

work for you?

--
Chuck Lever



^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-02 16:11             ` Chuck Lever III
@ 2024-12-03  4:28               ` NeilBrown
  2024-12-03 14:40                 ` Chuck Lever III
  0 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-12-03  4:28 UTC (permalink / raw)
  To: Chuck Lever III
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey

On Tue, 03 Dec 2024, Chuck Lever III wrote:
> 
> 
> > On Nov 21, 2024, at 5:29 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> > 
> > 
> > 
> >> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
> >> 
> >> On Wed, 20 Nov 2024, Chuck Lever wrote:
> >>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> >>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
> >>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> >>>>>> Reducing the number of slots in the session slot table requires
> >>>>>> confirmation from the client.  This patch adds reduce_session_slots()
> >>>>>> which starts the process of getting confirmation, but never calls it.
> >>>>>> That will come in a later patch.
> >>>>>> 
> >>>>>> Before we can free a slot we need to confirm that the client won't try
> >>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
> >>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> >>>>>> is not larger than we limit we are trying to impose.  So for each slot
> >>>>>> we need to remember that we have sent a reduced cr_maxrequests.
> >>>>>> 
> >>>>>> To achieve this we introduce a concept of request "generations".  Each
> >>>>>> time we decide to reduce cr_maxrequests we increment the generation
> >>>>>> number, and record this when we return the lower cr_maxrequests to the
> >>>>>> client.  When a slot with the current generation reports a low
> >>>>>> ca_maxrequests, we commit to that level and free extra slots.
> >>>>>> 
> >>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> >>>>>> we iterate all slots and reset the generation number to avoid false matches.
> >>>>>> 
> >>>>>> When we free a slot we store the seqid in the slot pointer so that it can
> >>>>>> be restored when we reactivate the slot.  The RFC can be read as
> >>>>>> suggesting that the slot number could restart from one after a slot is
> >>>>>> retired and reactivated, but also suggests that retiring slots is not
> >>>>>> required.  So when we reactive a slot we accept with the next seqid in
> >>>>>> sequence, or 1.
> >>>>>> 
> >>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> >>>>>> matches how it is encoded for the reply.
> >>>>>> 
> >>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
> >>>>>> ---
> >>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> >>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
> >>>>>> fs/nfsd/state.h     |  4 +++
> >>>>>> fs/nfsd/xdr4.h      |  2 --
> >>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
> >>>>>> 
> >>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >>>>>> index fb522165b376..0625b0aec6b8 100644
> >>>>>> --- a/fs/nfsd/nfs4state.c
> >>>>>> +++ b/fs/nfsd/nfs4state.c
> >>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> >>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >>>>>> 
> >>>>>> static void
> >>>>>> -free_session_slots(struct nfsd4_session *ses)
> >>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
> >>>>>> {
> >>>>>> int i;
> >>>>>> 
> >>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> >>>>>> + if (from >= ses->se_fchannel.maxreqs)
> >>>>>> + return;
> >>>>>> +
> >>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >>>>>> 
> >>>>>> - xa_erase(&ses->se_slots, i);
> >>>>>> + /*
> >>>>>> +  * Save the seqid in case we reactivate this slot.
> >>>>>> +  * This will never require a memory allocation so GFP
> >>>>>> +  * flag is irrelevant
> >>>>>> +  */
> >>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> >>>>>> +  GFP_ATOMIC);
> >>>>> 
> >>>>> Again... ATOMIC is probably not what we want here, even if it is
> >>>>> only documentary.
> >>>> 
> >>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> >>>> a warning.
> >>> 
> >>> I find using GFP_ATOMIC here to be confusing -- it requests
> >>> allocation from special memory reserves and is to be used in
> >>> situations where allocation might result in system failure. That is
> >>> clearly not the case here, and the resulting memory allocation might
> >>> be long-lived.
> >> 
> >> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??
> > 
> > I will be comfortable when I hear back from Matthew and Liam.
> > 
> > :-)
> > 
> > 
> >>> I see the comment that says memory won't actually be allocated. I'm
> >>> not sure that's the way xa_store() works, however.
> >> 
> >> xarray.rst says:
> >> 
> >> The xa_store(), xa_cmpxchg(), xa_alloc(),
> >> xa_reserve() and xa_insert() functions take a gfp_t
> >> parameter in case the XArray needs to allocate memory to store this entry.
> >> If the entry is being deleted, no memory allocation needs to be performed,
> >> and the GFP flags specified will be ignored.`
> >> 
> >> The particular context is that a normal pointer is currently stored a
> >> the given index, and we are replacing that with a number.  The above
> >> doesn't explicitly say that won't require a memory allocation, but I
> >> think it is reasonable to say it won't need "to allocate memory to store
> >> this entry" - as an entry is already stored - so allocation should not
> >> be needed.
> > 
> > xa_mk_value() converts a number to a pointer, and xa_store
> > stores that pointer.
> > 
> > I suspect that xa_store() is allowed to rebalance the
> > xarray's internal data structures, and that could result
> > in memory release or allocation. That's why a GFP flag is
> > one of the arguments.
> 
> Matthew says the xa_store() is guaranteed not to do a memory
> allocation in this case. However, they prefer an annotation
> of the call site with a "0" GFP argument to show that the
> allocation flags are not relevant.
> 
> Does this:
> 
> 	xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> 
> work for you?

Sure.
And it looks like sparse will be happy even though "0" isn't explicitly
"gfp_t" because 0 is "special".

I might prefer GFP_NULL or similar, but 0 certainly works for me.  I'll
include that when I resend.

Thanks
NeilBrown

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-03  4:28               ` NeilBrown
@ 2024-12-03 14:40                 ` Chuck Lever III
  0 siblings, 0 replies; 33+ messages in thread
From: Chuck Lever III @ 2024-12-03 14:40 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Dec 2, 2024, at 11:28 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Tue, 03 Dec 2024, Chuck Lever III wrote:
>> 
>> 
>>> On Nov 21, 2024, at 5:29 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
>>> 
>>> 
>>> 
>>>> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
>>>> 
>>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>>>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>>>>> Reducing the number of slots in the session slot table requires
>>>>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>>>>> which starts the process of getting confirmation, but never calls it.
>>>>>>>> That will come in a later patch.
>>>>>>>> 
>>>>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>>>>> 
>>>>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>>>>> client.  When a slot with the current generation reports a low
>>>>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>>>>> 
>>>>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>>>>> 
>>>>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>>>>> suggesting that the slot number could restart from one after a slot is
>>>>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>>>>> sequence, or 1.
>>>>>>>> 
>>>>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>>>>> matches how it is encoded for the reply.
>>>>>>>> 
>>>>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>>>>> ---
>>>>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>>>>> fs/nfsd/state.h     |  4 +++
>>>>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>>>>> 
>>>>>>>> static void
>>>>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>>>>> {
>>>>>>>> int i;
>>>>>>>> 
>>>>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>>>>> 
>>>>>>>> - xa_erase(&ses->se_slots, i);
>>>>>>>> + /*
>>>>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>>>>> +  * This will never require a memory allocation so GFP
>>>>>>>> +  * flag is irrelevant
>>>>>>>> +  */
>>>>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>>>>> +  GFP_ATOMIC);
>>>>>>> 
>>>>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>>>>> only documentary.
>>>>>> 
>>>>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>>>>> a warning.
>>>>> 
>>>>> I find using GFP_ATOMIC here to be confusing -- it requests
>>>>> allocation from special memory reserves and is to be used in
>>>>> situations where allocation might result in system failure. That is
>>>>> clearly not the case here, and the resulting memory allocation might
>>>>> be long-lived.
>>>> 
>>>> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??
>>> 
>>> I will be comfortable when I hear back from Matthew and Liam.
>>> 
>>> :-)
>>> 
>>> 
>>>>> I see the comment that says memory won't actually be allocated. I'm
>>>>> not sure that's the way xa_store() works, however.
>>>> 
>>>> xarray.rst says:
>>>> 
>>>> The xa_store(), xa_cmpxchg(), xa_alloc(),
>>>> xa_reserve() and xa_insert() functions take a gfp_t
>>>> parameter in case the XArray needs to allocate memory to store this entry.
>>>> If the entry is being deleted, no memory allocation needs to be performed,
>>>> and the GFP flags specified will be ignored.`
>>>> 
>>>> The particular context is that a normal pointer is currently stored a
>>>> the given index, and we are replacing that with a number.  The above
>>>> doesn't explicitly say that won't require a memory allocation, but I
>>>> think it is reasonable to say it won't need "to allocate memory to store
>>>> this entry" - as an entry is already stored - so allocation should not
>>>> be needed.
>>> 
>>> xa_mk_value() converts a number to a pointer, and xa_store
>>> stores that pointer.
>>> 
>>> I suspect that xa_store() is allowed to rebalance the
>>> xarray's internal data structures, and that could result
>>> in memory release or allocation. That's why a GFP flag is
>>> one of the arguments.
>> 
>> Matthew says the xa_store() is guaranteed not to do a memory
>> allocation in this case. However, they prefer an annotation
>> of the call site with a "0" GFP argument to show that the
>> allocation flags are not relevant.
>> 
>> Does this:
>> 
>> xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>> 
>> work for you?
> 
> Sure.
> And it looks like sparse will be happy even though "0" isn't explicitly
> "gfp_t" because 0 is "special".
> 
> I might prefer GFP_NULL or similar, but 0 certainly works for me.  I'll
> include that when I resend.

Matthew suggested GFP_NOALLOC. But neither of these symbolic
flags exist yet. I'd rather not hold up this series behind
the bikeshedding of the flag name ;-)


--
Chuck Lever



^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand
@ 2024-12-06  0:43 NeilBrown
  2024-12-06  0:43 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
                   ` (5 more replies)
  0 siblings, 6 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Changes from v2:
 - number of slots is increased more quickly.  Every time the highest-number
   slot is used, we increase by at least 20%.
 - when xa_store() is used in a context where we no that no allocation can
   happen, pass '0' as the GFP flag.
 - report target slots as well as total slots in /proc/fs/nfsd/clients/*/info

I've stay with reporting session information in the client info file
rather than creating a directory or using netlink.  I think the client
info file is simple and adequate.

I still haven't added support for CB_RECALL_SLOT.  While it is helpful,
it isn't essential.  I'll probably try to add it after the current
series lands.

Thanks,
NeilBrown



^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots
  2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  2024-12-06  0:43 ` [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC NeilBrown
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Using an xarray to store session slots will make it easier to change the
number of active slots based on demand, and removes an unnecessary
limit.

To achieve good throughput with a high-latency server it can be helpful
to have hundreds of concurrent writes, which means hundreds of slots.
So increase the limit to 2048 (twice what the Linux client will
currently use).  This limit is only a sanity check, not a hard limit.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 28 ++++++++++++++++++----------
 fs/nfsd/state.h     |  9 ++++++---
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 741b9449f727..aa4f1293d4d3 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1915,8 +1915,11 @@ free_session_slots(struct nfsd4_session *ses)
 	int i;
 
 	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
-		free_svc_cred(&ses->se_slots[i]->sl_cred);
-		kfree(ses->se_slots[i]);
+		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+		xa_erase(&ses->se_slots, i);
+		free_svc_cred(&slot->sl_cred);
+		kfree(slot);
 	}
 }
 
@@ -1996,17 +1999,20 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	struct nfsd4_session *new;
 	int i;
 
-	BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
-		     > PAGE_SIZE);
-
-	new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return NULL;
+	xa_init(&new->se_slots);
 	/* allocate each struct nfsd4_slot and data cache in one piece */
 	for (i = 0; i < numslots; i++) {
-		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
-		if (!new->se_slots[i])
+		struct nfsd4_slot *slot;
+		slot = kzalloc(slotsize, GFP_KERNEL);
+		if (!slot)
 			goto out_free;
+		if (xa_is_err(xa_store(&new->se_slots, i, slot, GFP_KERNEL))) {
+			kfree(slot);
+			goto out_free;
+		}
 	}
 
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
@@ -2017,7 +2023,8 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	return new;
 out_free:
 	while (i--)
-		kfree(new->se_slots[i]);
+		kfree(xa_load(&new->se_slots, i));
+	xa_destroy(&new->se_slots);
 	kfree(new);
 	return NULL;
 }
@@ -2124,6 +2131,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 static void __free_session(struct nfsd4_session *ses)
 {
 	free_session_slots(ses);
+	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
 
@@ -4278,7 +4286,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out_put_session;
 
-	slot = session->se_slots[seq->slotid];
+	slot = xa_load(&session->se_slots, seq->slotid);
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	/* We do not negotiate the number of slots yet, so set the
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e16bb3717fb9..aad547d3ad8b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -227,8 +227,11 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
 	return container_of(s, struct nfs4_delegation, dl_stid);
 }
 
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION     160
+/* Maximum number of slots per session.  This is for sanity-check only.
+ * It could be increased if we had a mechanism to shutdown misbehaving clients.
+ * A large number can be needed to get good throughput on high-latency servers.
+ */
+#define NFSD_MAX_SLOTS_PER_SESSION	2048
 /* Maximum  session per slot cache size */
 #define NFSD_SLOT_CACHE_SIZE		2048
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -327,7 +330,7 @@ struct nfsd4_session {
 	struct nfsd4_cb_sec	se_cb_sec;
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
-	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
+	struct xarray		se_slots;	/* forward channel slots */
 };
 
 /* formatted contents of nfs4_sessionid */
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC
  2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
  2024-12-06  0:43 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  2024-12-06  0:43 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Rather than guessing how much space it might be safe to use for the DRC,
simply try allocating slots and be prepared to accept failure.

The first slot for each session is allocated with GFP_KERNEL which is
unlikely to fail.  Subsequent slots are allocated with the addition of
__GFP_NORETRY which is expected to fail if there isn't much free memory.

This is probably too aggressive but clears the way for adding a
shrinker interface to free extra slots when memory is tight.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 94 ++++++++-------------------------------------
 fs/nfsd/nfsd.h      |  3 --
 fs/nfsd/nfssvc.c    | 32 ---------------
 3 files changed, 16 insertions(+), 113 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index aa4f1293d4d3..808cb0d897d5 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1938,65 +1938,13 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
 	return size + sizeof(struct nfsd4_slot);
 }
 
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
-{
-	u32 slotsize = slot_bytes(ca);
-	u32 num = ca->maxreqs;
-	unsigned long avail, total_avail;
-	unsigned int scale_factor;
-
-	spin_lock(&nfsd_drc_lock);
-	if (nfsd_drc_max_mem > nfsd_drc_mem_used)
-		total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
-	else
-		/* We have handed out more space than we chose in
-		 * set_max_drc() to allow.  That isn't really a
-		 * problem as long as that doesn't make us think we
-		 * have lots more due to integer overflow.
-		 */
-		total_avail = 0;
-	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
-	/*
-	 * Never use more than a fraction of the remaining memory,
-	 * unless it's the only way to give this client a slot.
-	 * The chosen fraction is either 1/8 or 1/number of threads,
-	 * whichever is smaller.  This ensures there are adequate
-	 * slots to support multiple clients per thread.
-	 * Give the client one slot even if that would require
-	 * over-allocation--it is better than failure.
-	 */
-	scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
-
-	avail = clamp_t(unsigned long, avail, slotsize,
-			total_avail/scale_factor);
-	num = min_t(int, num, avail / slotsize);
-	num = max_t(int, num, 1);
-	nfsd_drc_mem_used += num * slotsize;
-	spin_unlock(&nfsd_drc_lock);
-
-	return num;
-}
-
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
-	int slotsize = slot_bytes(ca);
-
-	spin_lock(&nfsd_drc_lock);
-	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
-	spin_unlock(&nfsd_drc_lock);
-}
-
 static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 					   struct nfsd4_channel_attrs *battrs)
 {
 	int numslots = fattrs->maxreqs;
 	int slotsize = slot_bytes(fattrs);
 	struct nfsd4_session *new;
+	struct nfsd4_slot *slot;
 	int i;
 
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -2004,17 +1952,21 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 		return NULL;
 	xa_init(&new->se_slots);
 	/* allocate each struct nfsd4_slot and data cache in one piece */
-	for (i = 0; i < numslots; i++) {
-		struct nfsd4_slot *slot;
-		slot = kzalloc(slotsize, GFP_KERNEL);
+	slot = kzalloc(slotsize, GFP_KERNEL);
+	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+		goto out_free;
+
+	for (i = 1; i < numslots; i++) {
+		const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
+		slot = kzalloc(slotsize, gfp);
 		if (!slot)
-			goto out_free;
-		if (xa_is_err(xa_store(&new->se_slots, i, slot, GFP_KERNEL))) {
+			break;
+		if (xa_is_err(xa_store(&new->se_slots, i, slot, gfp))) {
 			kfree(slot);
-			goto out_free;
+			break;
 		}
 	}
-
+	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
@@ -2022,8 +1974,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	spin_lock_init(&new->se_lock);
 	return new;
 out_free:
-	while (i--)
-		kfree(xa_load(&new->se_slots, i));
+	kfree(slot);
 	xa_destroy(&new->se_slots);
 	kfree(new);
 	return NULL;
@@ -2138,7 +2089,6 @@ static void __free_session(struct nfsd4_session *ses)
 static void free_session(struct nfsd4_session *ses)
 {
 	nfsd4_del_conns(ses);
-	nfsd4_put_drc_mem(&ses->se_fchannel);
 	__free_session(ses);
 }
 
@@ -3786,17 +3736,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
 	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
 			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
 	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
-	/*
-	 * Note decreasing slot size below client's request may make it
-	 * difficult for client to function correctly, whereas
-	 * decreasing the number of slots will (just?) affect
-	 * performance.  When short on memory we therefore prefer to
-	 * decrease number of slots instead of their size.  Clients that
-	 * request larger slots than they need will get poor results:
-	 * Note that we always allow at least one slot, because our
-	 * accounting is soft and provides no guarantees either way.
-	 */
-	ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
 
 	return nfs_ok;
 }
@@ -3874,11 +3813,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		return status;
 	status = check_backchannel_attrs(&cr_ses->back_channel);
 	if (status)
-		goto out_release_drc_mem;
+		goto out_err;
 	status = nfserr_jukebox;
 	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
 	if (!new)
-		goto out_release_drc_mem;
+		goto out_err;
 	conn = alloc_conn_from_crses(rqstp, cr_ses);
 	if (!conn)
 		goto out_free_session;
@@ -3987,8 +3926,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	free_conn(conn);
 out_free_session:
 	__free_session(new);
-out_release_drc_mem:
-	nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
 	return status;
 }
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 4b56ba1e8e48..3eb21e63b921 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -88,9 +88,6 @@ struct nfsd_genl_rqstp {
 extern struct svc_program	nfsd_programs[];
 extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
 extern struct mutex		nfsd_mutex;
-extern spinlock_t		nfsd_drc_lock;
-extern unsigned long		nfsd_drc_max_mem;
-extern unsigned long		nfsd_drc_mem_used;
 extern atomic_t			nfsd_th_cnt;		/* number of available threads */
 
 extern const struct seq_operations nfs_exports_op;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 49e2f32102ab..3dbaefc96608 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -70,16 +70,6 @@ static __be32			nfsd_init_request(struct svc_rqst *,
  */
 DEFINE_MUTEX(nfsd_mutex);
 
-/*
- * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
- * nfsd_drc_max_pages limits the total amount of memory available for
- * version 4.1 DRC caches.
- * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
- */
-DEFINE_SPINLOCK(nfsd_drc_lock);
-unsigned long	nfsd_drc_max_mem;
-unsigned long	nfsd_drc_mem_used;
-
 #if IS_ENABLED(CONFIG_NFS_LOCALIO)
 static const struct svc_version *localio_versions[] = {
 	[1] = &localio_version1,
@@ -575,27 +565,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
 		}
 }
 
-/*
- * Each session guarantees a negotiated per slot memory cache for replies
- * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
- * NFSv4.1 server might want to use more memory for a DRC than a machine
- * with mutiple services.
- *
- * Impose a hard limit on the number of pages for the DRC which varies
- * according to the machines free pages. This is of course only a default.
- *
- * For now this is a #defined shift which could be under admin control
- * in the future.
- */
-static void set_max_drc(void)
-{
-	#define NFSD_DRC_SIZE_SHIFT	7
-	nfsd_drc_max_mem = (nr_free_buffer_pages()
-					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
-	nfsd_drc_mem_used = 0;
-	dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
-}
-
 static int nfsd_get_default_max_blksize(void)
 {
 	struct sysinfo i;
@@ -678,7 +647,6 @@ int nfsd_create_serv(struct net *net)
 	nn->nfsd_serv = serv;
 	spin_unlock(&nfsd_notifier_lock);
 
-	set_max_drc();
 	/* check if the notifier is already set */
 	if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
 		register_inetaddr_notifier(&nfsd_inetaddr_notifier);
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
  2024-12-06  0:43 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
  2024-12-06  0:43 ` [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  2024-12-06  0:43 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Each client now reports the number of slots allocated in each session.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 808cb0d897d5..67dfc699e411 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2643,6 +2643,7 @@ static const char *cb_state2str(int state)
 static int client_info_show(struct seq_file *m, void *v)
 {
 	struct inode *inode = file_inode(m->file);
+	struct nfsd4_session *ses;
 	struct nfs4_client *clp;
 	u64 clid;
 
@@ -2679,6 +2680,13 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
 	seq_printf(m, "admin-revoked states: %d\n",
 		   atomic_read(&clp->cl_admin_revoked));
+	spin_lock(&clp->cl_lock);
+	seq_printf(m, "session slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	spin_unlock(&clp->cl_lock);
+	seq_puts(m, "\n");
+
 	drop_client(clp);
 
 	return 0;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (2 preceding siblings ...)
  2024-12-06  0:43 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  2024-12-06  1:04   ` Jeff Layton
  2024-12-06 20:51   ` Chuck Lever
  2024-12-06  0:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
  2024-12-06  0:43 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
  5 siblings, 2 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

If a client ever uses the highest available slot for a given session,
attempt to allocate more slots so there is room for the client to use
them if wanted.  GFP_NOWAIT is used so if there is not plenty of
free memory, failure is expected - which is what we want.  It also
allows the allocation while holding a spinlock.

Each time we increase the number of slots by 20% (rounded up).  This
allows fairly quick growth while avoiding excessive over-shoot.

We would expect to stablise with around 10% more slots available than
the client actually uses.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++-----
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 67dfc699e411..ec4468ebbd40 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	slot = xa_load(&session->se_slots, seq->slotid);
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
-	/* We do not negotiate the number of slots yet, so set the
-	 * maxslots to the session maxreqs which is used to encode
-	 * sr_highest_slotid and the sr_target_slot id to maxslots */
-	seq->maxslots = session->se_fchannel.maxreqs;
-
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
 	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
 					slot->sl_flags & NFSD4_SLOT_INUSE);
@@ -4289,6 +4284,41 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	cstate->session = session;
 	cstate->clp = clp;
 
+	/*
+	 * If the client ever uses the highest available slot,
+	 * gently try to allocate another 20%.  This allows
+	 * fairly quick growth without grossly over-shooting what
+	 * the client might use.
+	 */
+	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+		int s = session->se_fchannel.maxreqs;
+		int cnt = DIV_ROUND_UP(s, 5);
+
+		do {
+			/*
+			 * GFP_NOWAIT is a low-priority non-blocking
+			 * allocation which can be used under
+			 * client_lock and only succeeds if there is
+			 * plenty of memory.
+			 * Use GFP_ATOMIC which is higher priority for
+			 * xa_store() so we are less likely to waste the
+			 * effort of the first allocation.
+			 */
+			slot = kzalloc(slot_bytes(&session->se_fchannel),
+				       GFP_NOWAIT);
+			if (slot &&
+			    !xa_is_err(xa_store(&session->se_slots, s, slot,
+						GFP_ATOMIC | __GFP_NOWARN))) {
+				s += 1;
+				session->se_fchannel.maxreqs = s;
+			} else {
+				kfree(slot);
+			}
+		} while (slot && --cnt > 0);
+	}
+	seq->maxslots = session->se_fchannel.maxreqs;
+
 out:
 	switch (clp->cl_cb_state) {
 	case NFSD4_CB_DOWN:
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (3 preceding siblings ...)
  2024-12-06  0:43 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  2024-12-06  5:30   ` Jeff Layton
  2024-12-06  0:43 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
  5 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 8 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 80 +++++++++++++++++++++++++++++++++++++++------
 fs/nfsd/nfs4xdr.c   |  5 +--
 fs/nfsd/state.h     |  4 +++
 fs/nfsd/xdr4.h      |  2 --
 4 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ec4468ebbd40..e73668462739 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,54 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2722,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3715,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3727,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4279,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4304,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4320,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4341,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4307,17 +4359,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_ATOMIC | __GFP_NOWARN))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
+				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index aad547d3ad8b..74f2ab3c95aa 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -249,7 +249,9 @@ struct nfsd4_slot {
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
+	u8	sl_generation;
 	char	sl_data[];
 };
 
@@ -331,6 +333,8 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u8			se_slot_gen;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session
  2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (4 preceding siblings ...)
  2024-12-06  0:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  5 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Add a shrinker which frees unused slots and may ask the clients to use
fewer slots on each session.

We keep a global count of the number of freeable slots, which is the sum
of one less than the current "target" slots in all sessions in all
clients in all net-namespaces. This number is reported by the shrinker.

When the shrinker is asked to free some, we call xxx on each session in
a round-robin asking each to reduce the slot count by 1.  This will
reduce the "target" so the number reported by the shrinker will reduce
immediately.  The memory will only be freed later when the client
confirmed that it is no longer needed.

We use a global list of sessions and move the "head" to after the last
session that we asked to reduce, so the next callback from the shrinker
will move on to the next session.  This pressure should be applied
"evenly" across all sessions over time.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++---
 fs/nfsd/state.h     |  1 +
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e73668462739..d7bccc237027 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
  */
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session.  The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed.  The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
 static void
 free_session_slots(struct nfsd4_session *ses, int from)
 {
@@ -1930,11 +1940,14 @@ free_session_slots(struct nfsd4_session *ses, int from)
 		kfree(slot);
 	}
 	ses->se_fchannel.maxreqs = from;
-	if (ses->se_target_maxslots > from)
-		ses->se_target_maxslots = from;
+	if (ses->se_target_maxslots > from) {
+		int new_target = from ?: 1;
+		atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+		ses->se_target_maxslots = new_target;
+	}
 }
 
-static int __maybe_unused
+static int
 reduce_session_slots(struct nfsd4_session *ses, int dec)
 {
 	struct nfsd_net *nn = net_generic(ses->se_client->net,
@@ -1947,6 +1960,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec)
 		return ret;
 	ret = min(dec, ses->se_target_maxslots-1);
 	ses->se_target_maxslots -= ret;
+	atomic_sub(ret, &nfsd_total_target_slots);
 	ses->se_slot_gen += 1;
 	if (ses->se_slot_gen == 0) {
 		int i;
@@ -2006,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
 	new->se_target_maxslots = i;
+	atomic_add(i - 1, &nfsd_total_target_slots);
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2130,6 +2145,36 @@ static void free_session(struct nfsd4_session *ses)
 	__free_session(ses);
 }
 
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+	unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+	return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+	struct nfsd4_session *ses;
+	unsigned long scanned = 0;
+	unsigned long freed = 0;
+
+	spin_lock(&nfsd_session_list_lock);
+	list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+		freed += reduce_session_slots(ses, 1);
+		scanned += 1;
+		if (scanned >= sc->nr_to_scan) {
+			/* Move starting point for next scan */
+			list_move(&nfsd_session_list, &ses->se_all_sessions);
+			break;
+		}
+	}
+	spin_unlock(&nfsd_session_list_lock);
+	sc->nr_scanned = scanned;
+	return freed;
+}
+
 static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
@@ -2154,6 +2199,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
 
+	spin_lock(&nfsd_session_list_lock);
+	list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+	spin_unlock(&nfsd_session_list_lock);
+
 	{
 		struct sockaddr *sa = svc_addr(rqstp);
 		/*
@@ -2223,6 +2272,9 @@ unhash_session(struct nfsd4_session *ses)
 	spin_lock(&ses->se_client->cl_lock);
 	list_del(&ses->se_perclnt);
 	spin_unlock(&ses->se_client->cl_lock);
+	spin_lock(&nfsd_session_list_lock);
+	list_del(&ses->se_all_sessions);
+	spin_unlock(&nfsd_session_list_lock);
 }
 
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -4369,6 +4421,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 						GFP_ATOMIC | __GFP_NOWARN))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				atomic_add(s - session->se_target_maxslots,
+					   &nfsd_total_target_slots);
 				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
@@ -8765,7 +8819,6 @@ nfs4_state_start_net(struct net *net)
 }
 
 /* initialization to perform when the nfsd service is started: */
-
 int
 nfs4_state_start(void)
 {
@@ -8775,6 +8828,15 @@ nfs4_state_start(void)
 	if (ret)
 		return ret;
 
+	nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+	if (!nfsd_slot_shrinker) {
+		rhltable_destroy(&nfs4_file_rhltable);
+		return -ENOMEM;
+	}
+	nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+	nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+	shrinker_register(nfsd_slot_shrinker);
+
 	set_max_delegations();
 	return 0;
 }
@@ -8816,6 +8878,7 @@ void
 nfs4_state_shutdown(void)
 {
 	rhltable_destroy(&nfs4_file_rhltable);
+	shrinker_free(nfsd_slot_shrinker);
 }
 
 static void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 74f2ab3c95aa..a4ce2cf3d6a3 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -326,6 +326,7 @@ struct nfsd4_session {
 	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
+	struct list_head	se_all_sessions;/* global list of sessions */
 	struct nfs4_client	*se_client;
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-12-06  0:43 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
@ 2024-12-06  1:04   ` Jeff Layton
  2024-12-06  1:43     ` NeilBrown
  2024-12-06 20:51   ` Chuck Lever
  1 sibling, 1 reply; 33+ messages in thread
From: Jeff Layton @ 2024-12-06  1:04 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> If a client ever uses the highest available slot for a given session,
> attempt to allocate more slots so there is room for the client to use
> them if wanted.  GFP_NOWAIT is used so if there is not plenty of
> free memory, failure is expected - which is what we want.  It also
> allows the allocation while holding a spinlock.
> 
> Each time we increase the number of slots by 20% (rounded up).  This
> allows fairly quick growth while avoiding excessive over-shoot.
> 
> We would expect to stablise with around 10% more slots available than
> the client actually uses.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++-----
>  1 file changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 67dfc699e411..ec4468ebbd40 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	slot = xa_load(&session->se_slots, seq->slotid);
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
> -	/* We do not negotiate the number of slots yet, so set the
> -	 * maxslots to the session maxreqs which is used to encode
> -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> -	seq->maxslots = session->se_fchannel.maxreqs;
> -
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>  	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>  					slot->sl_flags & NFSD4_SLOT_INUSE);
> @@ -4289,6 +4284,41 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	cstate->session = session;
>  	cstate->clp = clp;
>  
> +	/*
> +	 * If the client ever uses the highest available slot,
> +	 * gently try to allocate another 20%.  This allows
> +	 * fairly quick growth without grossly over-shooting what
> +	 * the client might use.
> +	 */

20% seems like a reasonable place to start, but I do wonder if this
might need to be tunable under some workloads. Oh well, we can cross
that bridge if/when someone complains.
 
> +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> +		int s = session->se_fchannel.maxreqs;
> +		int cnt = DIV_ROUND_UP(s, 5);
> +
> +		do {
> +			/*
> +			 * GFP_NOWAIT is a low-priority non-blocking
> +			 * allocation which can be used under
> +			 * client_lock and only succeeds if there is
> +			 * plenty of memory.
> +			 * Use GFP_ATOMIC which is higher priority for
> +			 * xa_store() so we are less likely to waste the
> +			 * effort of the first allocation.
> +			 */

I don't know here. Why not just use GFP_NOWAIT for the xa_store too? If
we're so memory constrained that that fails, we're probably better off
releasing the slot.

> +			slot = kzalloc(slot_bytes(&session->se_fchannel),
> +				       GFP_NOWAIT);
> +			if (slot &&
> +			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> +						GFP_ATOMIC | __GFP_NOWARN))) {
> +				s += 1;
> +				session->se_fchannel.maxreqs = s;
> +			} else {
> +				kfree(slot);
> +			}
> +		} while (slot && --cnt > 0);
> +	}
> +	seq->maxslots = session->se_fchannel.maxreqs;
> +
>  out:
>  	switch (clp->cl_cb_state) {
>  	case NFSD4_CB_DOWN:

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-12-06  1:04   ` Jeff Layton
@ 2024-12-06  1:43     ` NeilBrown
  2024-12-06 13:49       ` Jeff Layton
  0 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-12-06  1:43 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 06 Dec 2024, Jeff Layton wrote:
> On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> > If a client ever uses the highest available slot for a given session,
> > attempt to allocate more slots so there is room for the client to use
> > them if wanted.  GFP_NOWAIT is used so if there is not plenty of
> > free memory, failure is expected - which is what we want.  It also
> > allows the allocation while holding a spinlock.
> > 
> > Each time we increase the number of slots by 20% (rounded up).  This
> > allows fairly quick growth while avoiding excessive over-shoot.
> > 
> > We would expect to stablise with around 10% more slots available than
> > the client actually uses.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++-----
> >  1 file changed, 35 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 67dfc699e411..ec4468ebbd40 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	slot = xa_load(&session->se_slots, seq->slotid);
> >  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >  
> > -	/* We do not negotiate the number of slots yet, so set the
> > -	 * maxslots to the session maxreqs which is used to encode
> > -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > -
> >  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> >  	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> >  					slot->sl_flags & NFSD4_SLOT_INUSE);
> > @@ -4289,6 +4284,41 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	cstate->session = session;
> >  	cstate->clp = clp;
> >  
> > +	/*
> > +	 * If the client ever uses the highest available slot,
> > +	 * gently try to allocate another 20%.  This allows
> > +	 * fairly quick growth without grossly over-shooting what
> > +	 * the client might use.
> > +	 */
> 
> 20% seems like a reasonable place to start, but I do wonder if this
> might need to be tunable under some workloads. Oh well, we can cross
> that bridge if/when someone complains.

I think that if we need a tunable, then it is a failure of design.
If?when someone complains we may well need to redesign.  I hope we could
avoid a tunable in that design!

>  
> > +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> > +		int s = session->se_fchannel.maxreqs;
> > +		int cnt = DIV_ROUND_UP(s, 5);
> > +
> > +		do {
> > +			/*
> > +			 * GFP_NOWAIT is a low-priority non-blocking
> > +			 * allocation which can be used under
> > +			 * client_lock and only succeeds if there is
> > +			 * plenty of memory.
> > +			 * Use GFP_ATOMIC which is higher priority for
> > +			 * xa_store() so we are less likely to waste the
> > +			 * effort of the first allocation.
> > +			 */
> 
> I don't know here. Why not just use GFP_NOWAIT for the xa_store too? If
> we're so memory constrained that that fails, we're probably better off
> releasing the slot.

Maybe.  I'm open simple using GFP_NOWAIT both places.
Most often xa_store won't need to allocate anything - it adds slots to
the array in batches (at least I assume it does - anything else would be
inefficient).  So it mostly won't matter.
So if seems at all inelegant - let's drop it.

Thanks,
NeilBrown


> 
> > +			slot = kzalloc(slot_bytes(&session->se_fchannel),
> > +				       GFP_NOWAIT);
> > +			if (slot &&
> > +			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> > +						GFP_ATOMIC | __GFP_NOWARN))) {
> > +				s += 1;
> > +				session->se_fchannel.maxreqs = s;
> > +			} else {
> > +				kfree(slot);
> > +			}
> > +		} while (slot && --cnt > 0);
> > +	}
> > +	seq->maxslots = session->se_fchannel.maxreqs;
> > +
> >  out:
> >  	switch (clp->cl_cb_state) {
> >  	case NFSD4_CB_DOWN:
> 
> -- 
> Jeff Layton <jlayton@kernel.org>
> 
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  0:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2024-12-06  5:30   ` Jeff Layton
  2024-12-06  6:05     ` NeilBrown
  0 siblings, 1 reply; 33+ messages in thread
From: Jeff Layton @ 2024-12-06  5:30 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 80 +++++++++++++++++++++++++++++++++++++++------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 77 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index ec4468ebbd40..e73668462739 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,54 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1968,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2081,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -2684,6 +2722,9 @@ static int client_info_show(struct seq_file *m, void *v)
>  	seq_printf(m, "session slots:");
>  	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>  		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> +	seq_printf(m, "\nsession target slots:");
> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> +		seq_printf(m, " %u", ses->se_target_maxslots);
>  	spin_unlock(&clp->cl_lock);
>  	seq_puts(m, "\n");
>  
> @@ -3674,10 +3715,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3686,6 +3727,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4236,8 +4279,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4262,6 +4304,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4272,9 +4320,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
> +	slot->sl_generation = session->se_slot_gen;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>  	else
> @@ -4291,9 +4341,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * the client might use.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
>  		int cnt = DIV_ROUND_UP(s, 5);
> +		void *prev_slot;
>  
>  		do {
>  			/*
> @@ -4307,17 +4359,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			 */
>  			slot = kzalloc(slot_bytes(&session->se_fchannel),
>  				       GFP_NOWAIT);
> +			prev_slot = xa_load(&session->se_slots, s);
> +			if (xa_is_value(prev_slot) && slot) {
> +				slot->sl_seqid = xa_to_value(prev_slot);
> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> +			}
>  			if (slot &&
>  			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>  						GFP_ATOMIC | __GFP_NOWARN))) {
>  				s += 1;
>  				session->se_fchannel.maxreqs = s;
> +				session->se_target_maxslots = s;
>  			} else {
>  				kfree(slot);
> +				slot = NULL;
>  			}
>  		} while (slot && --cnt > 0);
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 53fac037611c..4dcb03cd9292 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index aad547d3ad8b..74f2ab3c95aa 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -249,7 +249,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -331,6 +333,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  


I don't see where the above "#if 0" gets removed in patch 6. Shouldn't
it be?

While it makes for a larger patch, I think we'd be better served by
squashing 5 and 6 together. It doesn't make sense to add this core
infrastructure without something to call it.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  5:30   ` Jeff Layton
@ 2024-12-06  6:05     ` NeilBrown
  2024-12-06 13:59       ` Jeff Layton
  0 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-12-06  6:05 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 06 Dec 2024, Jeff Layton wrote:
> On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:

> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 382cc1389396..c26ba86dbdfd 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >  	u32			slotid;			/* request/response */
> >  	u32			maxslots;		/* request/response */
> >  	u32			cachethis;		/* request */
> > -#if 0
> >  	u32			target_maxslots;	/* response */
> > -#endif /* not yet */
> >  	u32			status_flags;		/* response */
> >  };
> >  
> 
> 
> I don't see where the above "#if 0" gets removed in patch 6. Shouldn't
> it be?

You are misreading.  It is being removed here in patch 5. 
It was added in 2.6.38 in 
Commit b85d4c01b76f ("nfsd41: sequence operation")


> 
> While it makes for a larger patch, I think we'd be better served by
> squashing 5 and 6 together. It doesn't make sense to add this core
> infrastructure without something to call it.

I find it easier to review if the distinct elements of functionality are
kept separate.  But if both you and Chuck want just one patch here, I
can do that.

Thanks,
NeilBrown


> -- 
> Jeff Layton <jlayton@kernel.org>
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-12-06  1:43     ` NeilBrown
@ 2024-12-06 13:49       ` Jeff Layton
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Layton @ 2024-12-06 13:49 UTC (permalink / raw)
  To: NeilBrown; +Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 2024-12-06 at 12:43 +1100, NeilBrown wrote:
> On Fri, 06 Dec 2024, Jeff Layton wrote:
> > On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> > > If a client ever uses the highest available slot for a given session,
> > > attempt to allocate more slots so there is room for the client to use
> > > them if wanted.  GFP_NOWAIT is used so if there is not plenty of
> > > free memory, failure is expected - which is what we want.  It also
> > > allows the allocation while holding a spinlock.
> > > 
> > > Each time we increase the number of slots by 20% (rounded up).  This
> > > allows fairly quick growth while avoiding excessive over-shoot.
> > > 
> > > We would expect to stablise with around 10% more slots available than
> > > the client actually uses.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 35 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 67dfc699e411..ec4468ebbd40 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >  	slot = xa_load(&session->se_slots, seq->slotid);
> > >  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> > >  
> > > -	/* We do not negotiate the number of slots yet, so set the
> > > -	 * maxslots to the session maxreqs which is used to encode
> > > -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> > > -	seq->maxslots = session->se_fchannel.maxreqs;
> > > -
> > >  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> > >  	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> > >  					slot->sl_flags & NFSD4_SLOT_INUSE);
> > > @@ -4289,6 +4284,41 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > >  	cstate->session = session;
> > >  	cstate->clp = clp;
> > >  
> > > +	/*
> > > +	 * If the client ever uses the highest available slot,
> > > +	 * gently try to allocate another 20%.  This allows
> > > +	 * fairly quick growth without grossly over-shooting what
> > > +	 * the client might use.
> > > +	 */
> > 
> > 20% seems like a reasonable place to start, but I do wonder if this
> > might need to be tunable under some workloads. Oh well, we can cross
> > that bridge if/when someone complains.
> 
> I think that if we need a tunable, then it is a failure of design.
> If?when someone complains we may well need to redesign.  I hope we could
> avoid a tunable in that design!
> 

I hope so too.

> >  
> > > +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > > +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> > > +		int s = session->se_fchannel.maxreqs;
> > > +		int cnt = DIV_ROUND_UP(s, 5);
> > > +
> > > +		do {
> > > +			/*
> > > +			 * GFP_NOWAIT is a low-priority non-blocking
> > > +			 * allocation which can be used under
> > > +			 * client_lock and only succeeds if there is
> > > +			 * plenty of memory.
> > > +			 * Use GFP_ATOMIC which is higher priority for
> > > +			 * xa_store() so we are less likely to waste the
> > > +			 * effort of the first allocation.
> > > +			 */
> > 
> > I don't know here. Why not just use GFP_NOWAIT for the xa_store too? If
> > we're so memory constrained that that fails, we're probably better off
> > releasing the slot.
> 
> Maybe.  I'm open simple using GFP_NOWAIT both places.
> Most often xa_store won't need to allocate anything - it adds slots to
> the array in batches (at least I assume it does - anything else would be
> inefficient).  So it mostly won't matter.
> So if seems at all inelegant - let's drop it.
> 
> 

I'd prefer we drop that part. It probably won't matter much in the long
run anyway.

> 
> > 
> > > +			slot = kzalloc(slot_bytes(&session->se_fchannel),
> > > +				       GFP_NOWAIT);
> > > +			if (slot &&
> > > +			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> > > +						GFP_ATOMIC | __GFP_NOWARN))) {
> > > +				s += 1;
> > > +				session->se_fchannel.maxreqs = s;
> > > +			} else {
> > > +				kfree(slot);
> > > +			}
> > > +		} while (slot && --cnt > 0);
> > > +	}
> > > +	seq->maxslots = session->se_fchannel.maxreqs;
> > > +
> > >  out:
> > >  	switch (clp->cl_cb_state) {
> > >  	case NFSD4_CB_DOWN:
> > 
> > -- 
> > Jeff Layton <jlayton@kernel.org>
> > 
> > 
> 

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  6:05     ` NeilBrown
@ 2024-12-06 13:59       ` Jeff Layton
  0 siblings, 0 replies; 33+ messages in thread
From: Jeff Layton @ 2024-12-06 13:59 UTC (permalink / raw)
  To: NeilBrown; +Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 2024-12-06 at 17:05 +1100, NeilBrown wrote:
> On Fri, 06 Dec 2024, Jeff Layton wrote:
> > On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> 
> > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > > index 382cc1389396..c26ba86dbdfd 100644
> > > --- a/fs/nfsd/xdr4.h
> > > +++ b/fs/nfsd/xdr4.h
> > > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> > >  	u32			slotid;			/* request/response */
> > >  	u32			maxslots;		/* request/response */
> > >  	u32			cachethis;		/* request */
> > > -#if 0
> > >  	u32			target_maxslots;	/* response */
> > > -#endif /* not yet */
> > >  	u32			status_flags;		/* response */
> > >  };
> > >  
> > 
> > 
> > I don't see where the above "#if 0" gets removed in patch 6. Shouldn't
> > it be?
> 
> You are misreading.  It is being removed here in patch 5. 
> It was added in 2.6.38 in 
> Commit b85d4c01b76f ("nfsd41: sequence operation")
> 

Oh, sorry -- my mistake. That's what I get for reviewing patches just
before boarding a redeye flight!

> 
> > 
> > While it makes for a larger patch, I think we'd be better served by
> > squashing 5 and 6 together. It doesn't make sense to add this core
> > infrastructure without something to call it.
> 
> I find it easier to review if the distinct elements of functionality are
> kept separate.  But if both you and Chuck want just one patch here, I
> can do that.
> 

The proposed code is bisectable, so I don't feel too strongly about it.
Adding in unused functions is "Not The Way We (Usually) Do Things"
though.

I think in this case it was harder for me to review, since I had to
skip ahead to patch #6 to see how reduce_session_slots() would actually
be used. The spin_trylock(), in particular was confusing until I
realized it was being called from a shrinker that iterated over all of
the clients and spinning there is probably not good.

Either way, a kerneldoc header over reduce_session_slots() that
explains this subtlety would be nice.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-12-06  0:43 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
  2024-12-06  1:04   ` Jeff Layton
@ 2024-12-06 20:51   ` Chuck Lever
  2024-12-08  4:52     ` NeilBrown
  1 sibling, 1 reply; 33+ messages in thread
From: Chuck Lever @ 2024-12-06 20:51 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 12/5/24 7:43 PM, NeilBrown wrote:
> If a client ever uses the highest available slot for a given session,
> attempt to allocate more slots so there is room for the client to use
> them if wanted.  GFP_NOWAIT is used so if there is not plenty of
> free memory, failure is expected - which is what we want.  It also
> allows the allocation while holding a spinlock.
> 
> Each time we increase the number of slots by 20% (rounded up).  This
> allows fairly quick growth while avoiding excessive over-shoot.
> 
> We would expect to stablise with around 10% more slots available than
> the client actually uses.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>   fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++-----
>   1 file changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 67dfc699e411..ec4468ebbd40 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	slot = xa_load(&session->se_slots, seq->slotid);
>   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>   
> -	/* We do not negotiate the number of slots yet, so set the
> -	 * maxslots to the session maxreqs which is used to encode
> -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> -	seq->maxslots = session->se_fchannel.maxreqs;
> -
>   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>   	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>   					slot->sl_flags & NFSD4_SLOT_INUSE);
> @@ -4289,6 +4284,41 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	cstate->session = session;
>   	cstate->clp = clp;
>   
> +	/*
> +	 * If the client ever uses the highest available slot,
> +	 * gently try to allocate another 20%.  This allows
> +	 * fairly quick growth without grossly over-shooting what
> +	 * the client might use.
> +	 */
> +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> +		int s = session->se_fchannel.maxreqs;
> +		int cnt = DIV_ROUND_UP(s, 5);
> +
> +		do {
> +			/*
> +			 * GFP_NOWAIT is a low-priority non-blocking
> +			 * allocation which can be used under
> +			 * client_lock and only succeeds if there is
> +			 * plenty of memory.
> +			 * Use GFP_ATOMIC which is higher priority for
> +			 * xa_store() so we are less likely to waste the
> +			 * effort of the first allocation.
> +			 */
> +			slot = kzalloc(slot_bytes(&session->se_fchannel),
> +				       GFP_NOWAIT);
> +			if (slot &&
> +			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> +						GFP_ATOMIC | __GFP_NOWARN))) {
> +				s += 1;
> +				session->se_fchannel.maxreqs = s;
> +			} else {
> +				kfree(slot);

Don't you want to break out of this loop if slot allocation or the
xa_store() fails? Does the session logic work if there is a gap
of unallocated slots in the slot table? Seems like we want to wait
a bit anyway after an allocation failure before asking again.

Otherwise, LGTM. I assume a v4 is forthcoming to address review
comments.


> +			}
> +		} while (slot && --cnt > 0);
> +	}
> +	seq->maxslots = session->se_fchannel.maxreqs;
> +
>   out:
>   	switch (clp->cl_cb_state) {
>   	case NFSD4_CB_DOWN:


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-12-06 20:51   ` Chuck Lever
@ 2024-12-08  4:52     ` NeilBrown
  0 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-08  4:52 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Sat, 07 Dec 2024, Chuck Lever wrote:
> On 12/5/24 7:43 PM, NeilBrown wrote:
> > If a client ever uses the highest available slot for a given session,
> > attempt to allocate more slots so there is room for the client to use
> > them if wanted.  GFP_NOWAIT is used so if there is not plenty of
> > free memory, failure is expected - which is what we want.  It also
> > allows the allocation while holding a spinlock.
> > 
> > Each time we increase the number of slots by 20% (rounded up).  This
> > allows fairly quick growth while avoiding excessive over-shoot.
> > 
> > We would expect to stablise with around 10% more slots available than
> > the client actually uses.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >   fs/nfsd/nfs4state.c | 40 +++++++++++++++++++++++++++++++++++-----
> >   1 file changed, 35 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 67dfc699e411..ec4468ebbd40 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -4235,11 +4235,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	slot = xa_load(&session->se_slots, seq->slotid);
> >   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >   
> > -	/* We do not negotiate the number of slots yet, so set the
> > -	 * maxslots to the session maxreqs which is used to encode
> > -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > -
> >   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> >   	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> >   					slot->sl_flags & NFSD4_SLOT_INUSE);
> > @@ -4289,6 +4284,41 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	cstate->session = session;
> >   	cstate->clp = clp;
> >   
> > +	/*
> > +	 * If the client ever uses the highest available slot,
> > +	 * gently try to allocate another 20%.  This allows
> > +	 * fairly quick growth without grossly over-shooting what
> > +	 * the client might use.
> > +	 */
> > +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> > +		int s = session->se_fchannel.maxreqs;
> > +		int cnt = DIV_ROUND_UP(s, 5);
> > +
> > +		do {
> > +			/*
> > +			 * GFP_NOWAIT is a low-priority non-blocking
> > +			 * allocation which can be used under
> > +			 * client_lock and only succeeds if there is
> > +			 * plenty of memory.
> > +			 * Use GFP_ATOMIC which is higher priority for
> > +			 * xa_store() so we are less likely to waste the
> > +			 * effort of the first allocation.
> > +			 */
> > +			slot = kzalloc(slot_bytes(&session->se_fchannel),
> > +				       GFP_NOWAIT);
> > +			if (slot &&
> > +			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> > +						GFP_ATOMIC | __GFP_NOWARN))) {
> > +				s += 1;
> > +				session->se_fchannel.maxreqs = s;
> > +			} else {
> > +				kfree(slot);
> 
> Don't you want to break out of this loop if slot allocation or the
> xa_store() fails? Does the session logic work if there is a gap
> of unallocated slots in the slot table? Seems like we want to wait
> a bit anyway after an allocation failure before asking again.

Indeed!  The "slot = NULL" which the next patch adds should be in this
patch.  That makes the loop abort.

> 
> Otherwise, LGTM. I assume a v4 is forthcoming to address review
> comments.

I'll send that out Monday morning.

Thanks,
NeilBrown

> 
> 
> > +			}
> > +		} while (slot && --cnt > 0);
> > +	}
> > +	seq->maxslots = session->se_fchannel.maxreqs;
> > +
> >   out:
> >   	switch (clp->cl_cb_state) {
> >   	case NFSD4_CB_DOWN:
> 
> 
> -- 
> Chuck Lever
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-08 22:43 [PATCH 0/6 v4] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-12-08 22:43 ` NeilBrown
  0 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2024-12-08 22:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 16 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

se_dead is moved in struct nfsd4_session to remove a hole.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c   |  5 ++-
 fs/nfsd/state.h     |  6 ++-
 fs/nfsd/xdr4.h      |  2 -
 4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fd9473d487f3..a2d1f97b8a0e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses:  The session to affect
+ * @dec:  how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock.  As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ *   The number of slots that the target was reduced by.
+ */
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_NOWAIT))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
 				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index aad547d3ad8b..4251ff3c5ad1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -245,10 +245,12 @@ struct nfsd4_slot {
 	struct svc_cred sl_cred;
 	u32	sl_datalen;
 	u16	sl_opcnt;
+	u16	sl_generation;
 #define NFSD4_SLOT_INUSE	(1 << 0)
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
 	char	sl_data[];
 };
@@ -321,7 +323,6 @@ struct nfsd4_session {
 	u32			se_cb_slot_avail; /* bitmap of available slots */
 	u32			se_cb_highest_slot;	/* highest slot client wants */
 	u32			se_cb_prog;
-	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
 	struct nfs4_client	*se_client;
@@ -331,6 +332,9 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u16			se_slot_gen;
+	bool			se_dead;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-11 21:47 [PATCH 0/6 v5] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-12-11 21:47 ` NeilBrown
  2025-01-19  2:01   ` Chuck Lever
  0 siblings, 1 reply; 33+ messages in thread
From: NeilBrown @ 2024-12-11 21:47 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 16 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

se_dead is moved in struct nfsd4_session to remove a hole.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c   |  5 ++-
 fs/nfsd/state.h     |  6 ++-
 fs/nfsd/xdr4.h      |  2 -
 4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fd9473d487f3..a2d1f97b8a0e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses:  The session to affect
+ * @dec:  how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock.  As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ *   The number of slots that the target was reduced by.
+ */
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_NOWAIT))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
 				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index aad547d3ad8b..4251ff3c5ad1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -245,10 +245,12 @@ struct nfsd4_slot {
 	struct svc_cred sl_cred;
 	u32	sl_datalen;
 	u16	sl_opcnt;
+	u16	sl_generation;
 #define NFSD4_SLOT_INUSE	(1 << 0)
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
 	char	sl_data[];
 };
@@ -321,7 +323,6 @@ struct nfsd4_session {
 	u32			se_cb_slot_avail; /* bitmap of available slots */
 	u32			se_cb_highest_slot;	/* highest slot client wants */
 	u32			se_cb_prog;
-	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
 	struct nfs4_client	*se_client;
@@ -331,6 +332,9 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u16			se_slot_gen;
+	bool			se_dead;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-11 21:47 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2025-01-19  2:01   ` Chuck Lever
  2025-01-21  2:36     ` NeilBrown
  2025-01-27  4:08     ` NeilBrown
  0 siblings, 2 replies; 33+ messages in thread
From: Chuck Lever @ 2025-01-19  2:01 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 12/11/24 4:47 PM, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 16 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> se_dead is moved in struct nfsd4_session to remove a hole.
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>   fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
>   fs/nfsd/nfs4xdr.c   |  5 ++-
>   fs/nfsd/state.h     |  6 ++-
>   fs/nfsd/xdr4.h      |  2 -
>   4 files changed, 92 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fd9473d487f3..a2d1f97b8a0e 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
>   #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>   
>   static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>   {
>   	int i;
>   
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>   		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>   
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>   		free_svc_cred(&slot->sl_cred);
>   		kfree(slot);
>   	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +/**
> + * reduce_session_slots - reduce the target max-slots of a session if possible
> + * @ses:  The session to affect
> + * @dec:  how much to decrease the target by
> + *
> + * This interface can be used by a shrinker to reduce the target max-slots
> + * for a session so that some slots can eventually be freed.
> + * It uses spin_trylock() as it may be called in a context where another
> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> + * best-effort, skiping a session is client_lock is already held has no
> + * great coast
> + *
> + * Return value:
> + *   The number of slots that the target was reduced by.
> + */
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>   }
>   
>   /*
> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>   	}
>   	fattrs->maxreqs = i;
>   	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>   	new->se_cb_slot_avail = ~0U;
>   	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>   				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>   
>   static void __free_session(struct nfsd4_session *ses)
>   {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>   	xa_destroy(&ses->se_slots);
>   	kfree(ses);
>   }
> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
>   	seq_printf(m, "session slots:");
>   	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>   		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> +	seq_printf(m, "\nsession target slots:");
> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> +		seq_printf(m, " %u", ses->se_target_maxslots);
>   	spin_unlock(&clp->cl_lock);
>   	seq_puts(m, "\n");
>   
> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>   	kfree(exid->server_impl_name);
>   }
>   
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>   {
>   	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>   		if (seqid == slot_seqid)
>   			return nfserr_jukebox;
>   		else
> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>   	/* Note unsigned 32-bit arithmetic handles wraparound: */
>   	if (likely(seqid == slot_seqid + 1))
>   		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>   	if (seqid == slot_seqid)
>   		return nfserr_replay_cache;
>   	return nfserr_seq_misordered;
> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>   
>   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>   	if (status == nfserr_replay_cache) {
>   		status = nfserr_seq_misordered;
>   		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	if (status)
>   		goto out_put_session;
>   
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>   	buflen = (seq->cachethis) ?
>   			session->se_fchannel.maxresp_cached :
>   			session->se_fchannel.maxresp_sz;
> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	svc_reserve(rqstp, buflen);
>   
>   	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>   	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>   	slot->sl_flags |= NFSD4_SLOT_INUSE;
> +	slot->sl_generation = session->se_slot_gen;
>   	if (seq->cachethis)
>   		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>   	else
> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	 * the client might use.
>   	 */
>   	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>   	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>   		int s = session->se_fchannel.maxreqs;
>   		int cnt = DIV_ROUND_UP(s, 5);
> +		void *prev_slot;
>   
>   		do {
>   			/*
> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   			 */
>   			slot = kzalloc(slot_bytes(&session->se_fchannel),
>   				       GFP_NOWAIT);
> +			prev_slot = xa_load(&session->se_slots, s);
> +			if (xa_is_value(prev_slot) && slot) {
> +				slot->sl_seqid = xa_to_value(prev_slot);
> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> +			}
>   			if (slot &&
>   			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>   						GFP_NOWAIT))) {
>   				s += 1;
>   				session->se_fchannel.maxreqs = s;
> +				session->se_target_maxslots = s;
>   			} else {
>   				kfree(slot);
>   				slot = NULL;
>   			}
>   		} while (slot && --cnt > 0);
>   	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>   
>   out:
>   	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 53fac037611c..4dcb03cd9292 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>   		return nfserr_bad_xdr;
>   	seq->seqid = be32_to_cpup(p++);
>   	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>   	seq->cachethis = be32_to_cpup(p);
>   
>   	seq->status_flags = 0;
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index aad547d3ad8b..4251ff3c5ad1 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -245,10 +245,12 @@ struct nfsd4_slot {
>   	struct svc_cred sl_cred;
>   	u32	sl_datalen;
>   	u16	sl_opcnt;
> +	u16	sl_generation;
>   #define NFSD4_SLOT_INUSE	(1 << 0)
>   #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>   #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>   #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>   	u8	sl_flags;
>   	char	sl_data[];
>   };
> @@ -321,7 +323,6 @@ struct nfsd4_session {
>   	u32			se_cb_slot_avail; /* bitmap of available slots */
>   	u32			se_cb_highest_slot;	/* highest slot client wants */
>   	u32			se_cb_prog;
> -	bool			se_dead;
>   	struct list_head	se_hash;	/* hash by sessionid */
>   	struct list_head	se_perclnt;
>   	struct nfs4_client	*se_client;
> @@ -331,6 +332,9 @@ struct nfsd4_session {
>   	struct list_head	se_conns;
>   	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>   	struct xarray		se_slots;	/* forward channel slots */
> +	u16			se_slot_gen;
> +	bool			se_dead;
> +	u32			se_target_maxslots;
>   };
>   
>   /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>   	u32			slotid;			/* request/response */
>   	u32			maxslots;		/* request/response */
>   	u32			cachethis;		/* request */
> -#if 0
>   	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>   	u32			status_flags;		/* response */
>   };
>   

Hi Neil -

I've found some misbehavior which I've bisected to this commit.

If disconnect injection is set up to break the connection every 25,000
RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
after this commit is applied.

Network capture shows that the server eventually starts returning
SEQ_MISORDERED because the client has forgotten an retired slot after a
disconnect, and tries to use sequence number 1 for that slot with a new
operation.

I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
client. This function uses a bit of calculus to decide when to bump the
slot table's generation number. In the failing case, it never bumps the
generation, and that results in the client freeing slots it shouldn't.
The server's reported { highest, target_highest } slot numbers don't
appear to change correctly after the client has reconnected.

If I revert this hunk from 5/6:

@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres 
*resp, __be32 nfserr,
  	if (nfserr != nfs_ok)
  		return nfserr;
  	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
  	if (nfserr != nfs_ok)
  		return nfserr;
  	/* sr_status_flags */

the reproducer above runs to completion in the expected amount of time.


The high order bit here is whether I should drop these patches for
v6.14, or whether you believe you can come up with a narrow solution
during the early part of v6.14-rc that I can include in an -rc update
for NFSD. I can't really tell if a significant amount of surgery will
be necessary.

What do you think?


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-19  2:01   ` Chuck Lever
@ 2025-01-21  2:36     ` NeilBrown
  2025-01-21 16:24       ` Chuck Lever
  2025-01-27  4:08     ` NeilBrown
  1 sibling, 1 reply; 33+ messages in thread
From: NeilBrown @ 2025-01-21  2:36 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Sun, 19 Jan 2025, Chuck Lever wrote:
> On 12/11/24 4:47 PM, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 16 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > se_dead is moved in struct nfsd4_session to remove a hole.
> > 
> > Reviewed-by: Jeff Layton <jlayton@kernel.org>
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >   fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
> >   fs/nfsd/nfs4xdr.c   |  5 ++-
> >   fs/nfsd/state.h     |  6 ++-
> >   fs/nfsd/xdr4.h      |  2 -
> >   4 files changed, 92 insertions(+), 15 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fd9473d487f3..a2d1f97b8a0e 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
> >   #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >   
> >   static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >   {
> >   	int i;
> >   
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >   		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >   
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> >   		free_svc_cred(&slot->sl_cred);
> >   		kfree(slot);
> >   	}
> > +	ses->se_fchannel.maxreqs = from;
> > +	if (ses->se_target_maxslots > from)
> > +		ses->se_target_maxslots = from;
> > +}
> > +
> > +/**
> > + * reduce_session_slots - reduce the target max-slots of a session if possible
> > + * @ses:  The session to affect
> > + * @dec:  how much to decrease the target by
> > + *
> > + * This interface can be used by a shrinker to reduce the target max-slots
> > + * for a session so that some slots can eventually be freed.
> > + * It uses spin_trylock() as it may be called in a context where another
> > + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> > + * best-effort, skiping a session is client_lock is already held has no
> > + * great coast
> > + *
> > + * Return value:
> > + *   The number of slots that the target was reduced by.
> > + */
> > +static int __maybe_unused
> > +reduce_session_slots(struct nfsd4_session *ses, int dec)
> > +{
> > +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> > +					  nfsd_net_id);
> > +	int ret = 0;
> > +
> > +	if (ses->se_target_maxslots <= 1)
> > +		return ret;
> > +	if (!spin_trylock(&nn->client_lock))
> > +		return ret;
> > +	ret = min(dec, ses->se_target_maxslots-1);
> > +	ses->se_target_maxslots -= ret;
> > +	ses->se_slot_gen += 1;
> > +	if (ses->se_slot_gen == 0) {
> > +		int i;
> > +		ses->se_slot_gen = 1;
> > +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > +			slot->sl_generation = 0;
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	return ret;
> >   }
> >   
> >   /*
> > @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >   	}
> >   	fattrs->maxreqs = i;
> >   	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> > +	new->se_target_maxslots = i;
> >   	new->se_cb_slot_avail = ~0U;
> >   	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
> >   				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> > @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
> >   
> >   static void __free_session(struct nfsd4_session *ses)
> >   {
> > -	free_session_slots(ses);
> > +	free_session_slots(ses, 0);
> >   	xa_destroy(&ses->se_slots);
> >   	kfree(ses);
> >   }
> > @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
> >   	seq_printf(m, "session slots:");
> >   	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >   		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > +	seq_printf(m, "\nsession target slots:");
> > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > +		seq_printf(m, " %u", ses->se_target_maxslots);
> >   	spin_unlock(&clp->cl_lock);
> >   	seq_puts(m, "\n");
> >   
> > @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
> >   	kfree(exid->server_impl_name);
> >   }
> >   
> > -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> > +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
> >   {
> >   	/* The slot is in use, and no response has been sent. */
> > -	if (slot_inuse) {
> > +	if (flags & NFSD4_SLOT_INUSE) {
> >   		if (seqid == slot_seqid)
> >   			return nfserr_jukebox;
> >   		else
> > @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >   	/* Note unsigned 32-bit arithmetic handles wraparound: */
> >   	if (likely(seqid == slot_seqid + 1))
> >   		return nfs_ok;
> > +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> > +		return nfs_ok;
> >   	if (seqid == slot_seqid)
> >   		return nfserr_replay_cache;
> >   	return nfserr_seq_misordered;
> > @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >   
> >   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> > -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> > -					slot->sl_flags & NFSD4_SLOT_INUSE);
> > +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
> >   	if (status == nfserr_replay_cache) {
> >   		status = nfserr_seq_misordered;
> >   		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> > @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	if (status)
> >   		goto out_put_session;
> >   
> > +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> > +	    slot->sl_generation == session->se_slot_gen &&
> > +	    seq->maxslots <= session->se_target_maxslots)
> > +		/* Client acknowledged our reduce maxreqs */
> > +		free_session_slots(session, session->se_target_maxslots);
> > +
> >   	buflen = (seq->cachethis) ?
> >   			session->se_fchannel.maxresp_cached :
> >   			session->se_fchannel.maxresp_sz;
> > @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	svc_reserve(rqstp, buflen);
> >   
> >   	status = nfs_ok;
> > -	/* Success! bump slot seqid */
> > +	/* Success! accept new slot seqid */
> >   	slot->sl_seqid = seq->seqid;
> > +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
> >   	slot->sl_flags |= NFSD4_SLOT_INUSE;
> > +	slot->sl_generation = session->se_slot_gen;
> >   	if (seq->cachethis)
> >   		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> >   	else
> > @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	 * the client might use.
> >   	 */
> >   	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
> >   	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> >   		int s = session->se_fchannel.maxreqs;
> >   		int cnt = DIV_ROUND_UP(s, 5);
> > +		void *prev_slot;
> >   
> >   		do {
> >   			/*
> > @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   			 */
> >   			slot = kzalloc(slot_bytes(&session->se_fchannel),
> >   				       GFP_NOWAIT);
> > +			prev_slot = xa_load(&session->se_slots, s);
> > +			if (xa_is_value(prev_slot) && slot) {
> > +				slot->sl_seqid = xa_to_value(prev_slot);
> > +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> > +			}
> >   			if (slot &&
> >   			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> >   						GFP_NOWAIT))) {
> >   				s += 1;
> >   				session->se_fchannel.maxreqs = s;
> > +				session->se_target_maxslots = s;
> >   			} else {
> >   				kfree(slot);
> >   				slot = NULL;
> >   			}
> >   		} while (slot && --cnt > 0);
> >   	}
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> > +	seq->target_maxslots = session->se_target_maxslots;
> >   
> >   out:
> >   	switch (clp->cl_cb_state) {
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 53fac037611c..4dcb03cd9292 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> >   		return nfserr_bad_xdr;
> >   	seq->seqid = be32_to_cpup(p++);
> >   	seq->slotid = be32_to_cpup(p++);
> > -	seq->maxslots = be32_to_cpup(p++);
> > +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> > +	seq->maxslots = be32_to_cpup(p++) + 1;
> >   	seq->cachethis = be32_to_cpup(p);
> >   
> >   	seq->status_flags = 0;
> > @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_target_highest_slotid */
> > -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> > +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_status_flags */
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index aad547d3ad8b..4251ff3c5ad1 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -245,10 +245,12 @@ struct nfsd4_slot {
> >   	struct svc_cred sl_cred;
> >   	u32	sl_datalen;
> >   	u16	sl_opcnt;
> > +	u16	sl_generation;
> >   #define NFSD4_SLOT_INUSE	(1 << 0)
> >   #define NFSD4_SLOT_CACHETHIS	(1 << 1)
> >   #define NFSD4_SLOT_INITIALIZED	(1 << 2)
> >   #define NFSD4_SLOT_CACHED	(1 << 3)
> > +#define NFSD4_SLOT_REUSED	(1 << 4)
> >   	u8	sl_flags;
> >   	char	sl_data[];
> >   };
> > @@ -321,7 +323,6 @@ struct nfsd4_session {
> >   	u32			se_cb_slot_avail; /* bitmap of available slots */
> >   	u32			se_cb_highest_slot;	/* highest slot client wants */
> >   	u32			se_cb_prog;
> > -	bool			se_dead;
> >   	struct list_head	se_hash;	/* hash by sessionid */
> >   	struct list_head	se_perclnt;
> >   	struct nfs4_client	*se_client;
> > @@ -331,6 +332,9 @@ struct nfsd4_session {
> >   	struct list_head	se_conns;
> >   	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
> >   	struct xarray		se_slots;	/* forward channel slots */
> > +	u16			se_slot_gen;
> > +	bool			se_dead;
> > +	u32			se_target_maxslots;
> >   };
> >   
> >   /* formatted contents of nfs4_sessionid */
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 382cc1389396..c26ba86dbdfd 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >   	u32			slotid;			/* request/response */
> >   	u32			maxslots;		/* request/response */
> >   	u32			cachethis;		/* request */
> > -#if 0
> >   	u32			target_maxslots;	/* response */
> > -#endif /* not yet */
> >   	u32			status_flags;		/* response */
> >   };
> >   
> 
> Hi Neil -
> 
> I've found some misbehavior which I've bisected to this commit.
> 
> If disconnect injection is set up to break the connection every 25,000
> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
> after this commit is applied.
> 
> Network capture shows that the server eventually starts returning
> SEQ_MISORDERED because the client has forgotten an retired slot after a
> disconnect, and tries to use sequence number 1 for that slot with a new
> operation.
> 
> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
> client. This function uses a bit of calculus to decide when to bump the
> slot table's generation number. In the failing case, it never bumps the
> generation, and that results in the client freeing slots it shouldn't.
> The server's reported { highest, target_highest } slot numbers don't
> appear to change correctly after the client has reconnected.
> 
> If I revert this hunk from 5/6:
> 
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres 
> *resp, __be32 nfserr,
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_status_flags */
> 
> the reproducer above runs to completion in the expected amount of time.
> 
> 
> The high order bit here is whether I should drop these patches for
> v6.14, or whether you believe you can come up with a narrow solution
> during the early part of v6.14-rc that I can include in an -rc update
> for NFSD. I can't really tell if a significant amount of surgery will
> be necessary.
> 
> What do you think?

I think I can fix it.

It sounds like it might be a client bug, or it might be a difference in
interpretation of the spec, or it might be an ambiguity in the spec.

But I think I can fix it by setting NFSD_SLOT_REUSED in any slots
beyond ->target_maxslots when I reduce target_maxslots.  That makes it
so that we accept either 1 or the next-in-sequence seqid.
Doing that does open up a possible risk of a resend being accepted as a
new request.  As it is a new connection, resends are a real possibility.
I'll have to ponder that a bit more and might need to be careful about
retiring slots with a low seqid which have been used recently.  Maybe I
need to store the time when seqid 1 was used, and not return a slot
until some minimum time after that timestamp.

I'll try to get you a patch before the end of next week.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-21  2:36     ` NeilBrown
@ 2025-01-21 16:24       ` Chuck Lever
  0 siblings, 0 replies; 33+ messages in thread
From: Chuck Lever @ 2025-01-21 16:24 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 1/20/25 9:36 PM, NeilBrown wrote:
> On Sun, 19 Jan 2025, Chuck Lever wrote:
>> On 12/11/24 4:47 PM, NeilBrown wrote:
>>> Reducing the number of slots in the session slot table requires
>>> confirmation from the client.  This patch adds reduce_session_slots()
>>> which starts the process of getting confirmation, but never calls it.
>>> That will come in a later patch.
>>>
>>> Before we can free a slot we need to confirm that the client won't try
>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>> is not larger than we limit we are trying to impose.  So for each slot
>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>
>>> To achieve this we introduce a concept of request "generations".  Each
>>> time we decide to reduce cr_maxrequests we increment the generation
>>> number, and record this when we return the lower cr_maxrequests to the
>>> client.  When a slot with the current generation reports a low
>>> ca_maxrequests, we commit to that level and free extra slots.
>>>
>>> We use an 16 bit generation number (64 seems wasteful) and if it cycles
>>> we iterate all slots and reset the generation number to avoid false matches.
>>>
>>> When we free a slot we store the seqid in the slot pointer so that it can
>>> be restored when we reactivate the slot.  The RFC can be read as
>>> suggesting that the slot number could restart from one after a slot is
>>> retired and reactivated, but also suggests that retiring slots is not
>>> required.  So when we reactive a slot we accept with the next seqid in
>>> sequence, or 1.
>>>
>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>> matches how it is encoded for the reply.
>>>
>>> se_dead is moved in struct nfsd4_session to remove a hole.
>>>
>>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>> ---
>>>    fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
>>>    fs/nfsd/nfs4xdr.c   |  5 ++-
>>>    fs/nfsd/state.h     |  6 ++-
>>>    fs/nfsd/xdr4.h      |  2 -
>>>    4 files changed, 92 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index fd9473d487f3..a2d1f97b8a0e 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
>>>    #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>    
>>>    static void
>>> -free_session_slots(struct nfsd4_session *ses)
>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>    {
>>>    	int i;
>>>    
>>> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +	if (from >= ses->se_fchannel.maxreqs)
>>> +		return;
>>> +
>>> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>    		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>    
>>> -		xa_erase(&ses->se_slots, i);
>>> +		/*
>>> +		 * Save the seqid in case we reactivate this slot.
>>> +		 * This will never require a memory allocation so GFP
>>> +		 * flag is irrelevant
>>> +		 */
>>> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>>>    		free_svc_cred(&slot->sl_cred);
>>>    		kfree(slot);
>>>    	}
>>> +	ses->se_fchannel.maxreqs = from;
>>> +	if (ses->se_target_maxslots > from)
>>> +		ses->se_target_maxslots = from;
>>> +}
>>> +
>>> +/**
>>> + * reduce_session_slots - reduce the target max-slots of a session if possible
>>> + * @ses:  The session to affect
>>> + * @dec:  how much to decrease the target by
>>> + *
>>> + * This interface can be used by a shrinker to reduce the target max-slots
>>> + * for a session so that some slots can eventually be freed.
>>> + * It uses spin_trylock() as it may be called in a context where another
>>> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
>>> + * best-effort, skiping a session is client_lock is already held has no
>>> + * great coast
>>> + *
>>> + * Return value:
>>> + *   The number of slots that the target was reduced by.
>>> + */
>>> +static int __maybe_unused
>>> +reduce_session_slots(struct nfsd4_session *ses, int dec)
>>> +{
>>> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
>>> +					  nfsd_net_id);
>>> +	int ret = 0;
>>> +
>>> +	if (ses->se_target_maxslots <= 1)
>>> +		return ret;
>>> +	if (!spin_trylock(&nn->client_lock))
>>> +		return ret;
>>> +	ret = min(dec, ses->se_target_maxslots-1);
>>> +	ses->se_target_maxslots -= ret;
>>> +	ses->se_slot_gen += 1;
>>> +	if (ses->se_slot_gen == 0) {
>>> +		int i;
>>> +		ses->se_slot_gen = 1;
>>> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>> +			slot->sl_generation = 0;
>>> +		}
>>> +	}
>>> +	spin_unlock(&nn->client_lock);
>>> +	return ret;
>>>    }
>>>    
>>>    /*
>>> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>>>    	}
>>>    	fattrs->maxreqs = i;
>>>    	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>>> +	new->se_target_maxslots = i;
>>>    	new->se_cb_slot_avail = ~0U;
>>>    	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>>>    				      NFSD_BC_SLOT_TABLE_SIZE - 1);
>>> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>>>    
>>>    static void __free_session(struct nfsd4_session *ses)
>>>    {
>>> -	free_session_slots(ses);
>>> +	free_session_slots(ses, 0);
>>>    	xa_destroy(&ses->se_slots);
>>>    	kfree(ses);
>>>    }
>>> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
>>>    	seq_printf(m, "session slots:");
>>>    	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>>    		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
>>> +	seq_printf(m, "\nsession target slots:");
>>> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>> +		seq_printf(m, " %u", ses->se_target_maxslots);
>>>    	spin_unlock(&clp->cl_lock);
>>>    	seq_puts(m, "\n");
>>>    
>>> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>>>    	kfree(exid->server_impl_name);
>>>    }
>>>    
>>> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>>>    {
>>>    	/* The slot is in use, and no response has been sent. */
>>> -	if (slot_inuse) {
>>> +	if (flags & NFSD4_SLOT_INUSE) {
>>>    		if (seqid == slot_seqid)
>>>    			return nfserr_jukebox;
>>>    		else
>>> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>>    	/* Note unsigned 32-bit arithmetic handles wraparound: */
>>>    	if (likely(seqid == slot_seqid + 1))
>>>    		return nfs_ok;
>>> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
>>> +		return nfs_ok;
>>>    	if (seqid == slot_seqid)
>>>    		return nfserr_replay_cache;
>>>    	return nfserr_seq_misordered;
>>> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>>>    
>>>    	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>>> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>>> -					slot->sl_flags & NFSD4_SLOT_INUSE);
>>> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>>>    	if (status == nfserr_replay_cache) {
>>>    		status = nfserr_seq_misordered;
>>>    		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
>>> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	if (status)
>>>    		goto out_put_session;
>>>    
>>> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
>>> +	    slot->sl_generation == session->se_slot_gen &&
>>> +	    seq->maxslots <= session->se_target_maxslots)
>>> +		/* Client acknowledged our reduce maxreqs */
>>> +		free_session_slots(session, session->se_target_maxslots);
>>> +
>>>    	buflen = (seq->cachethis) ?
>>>    			session->se_fchannel.maxresp_cached :
>>>    			session->se_fchannel.maxresp_sz;
>>> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	svc_reserve(rqstp, buflen);
>>>    
>>>    	status = nfs_ok;
>>> -	/* Success! bump slot seqid */
>>> +	/* Success! accept new slot seqid */
>>>    	slot->sl_seqid = seq->seqid;
>>> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>>>    	slot->sl_flags |= NFSD4_SLOT_INUSE;
>>> +	slot->sl_generation = session->se_slot_gen;
>>>    	if (seq->cachethis)
>>>    		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>>>    	else
>>> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	 * the client might use.
>>>    	 */
>>>    	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
>>> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>>>    	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>>>    		int s = session->se_fchannel.maxreqs;
>>>    		int cnt = DIV_ROUND_UP(s, 5);
>>> +		void *prev_slot;
>>>    
>>>    		do {
>>>    			/*
>>> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    			 */
>>>    			slot = kzalloc(slot_bytes(&session->se_fchannel),
>>>    				       GFP_NOWAIT);
>>> +			prev_slot = xa_load(&session->se_slots, s);
>>> +			if (xa_is_value(prev_slot) && slot) {
>>> +				slot->sl_seqid = xa_to_value(prev_slot);
>>> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
>>> +			}
>>>    			if (slot &&
>>>    			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>>>    						GFP_NOWAIT))) {
>>>    				s += 1;
>>>    				session->se_fchannel.maxreqs = s;
>>> +				session->se_target_maxslots = s;
>>>    			} else {
>>>    				kfree(slot);
>>>    				slot = NULL;
>>>    			}
>>>    		} while (slot && --cnt > 0);
>>>    	}
>>> -	seq->maxslots = session->se_fchannel.maxreqs;
>>> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
>>> +	seq->target_maxslots = session->se_target_maxslots;
>>>    
>>>    out:
>>>    	switch (clp->cl_cb_state) {
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 53fac037611c..4dcb03cd9292 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>>>    		return nfserr_bad_xdr;
>>>    	seq->seqid = be32_to_cpup(p++);
>>>    	seq->slotid = be32_to_cpup(p++);
>>> -	seq->maxslots = be32_to_cpup(p++);
>>> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
>>> +	seq->maxslots = be32_to_cpup(p++) + 1;
>>>    	seq->cachethis = be32_to_cpup(p);
>>>    
>>>    	seq->status_flags = 0;
>>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_target_highest_slotid */
>>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_status_flags */
>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>> index aad547d3ad8b..4251ff3c5ad1 100644
>>> --- a/fs/nfsd/state.h
>>> +++ b/fs/nfsd/state.h
>>> @@ -245,10 +245,12 @@ struct nfsd4_slot {
>>>    	struct svc_cred sl_cred;
>>>    	u32	sl_datalen;
>>>    	u16	sl_opcnt;
>>> +	u16	sl_generation;
>>>    #define NFSD4_SLOT_INUSE	(1 << 0)
>>>    #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>>>    #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>>>    #define NFSD4_SLOT_CACHED	(1 << 3)
>>> +#define NFSD4_SLOT_REUSED	(1 << 4)
>>>    	u8	sl_flags;
>>>    	char	sl_data[];
>>>    };
>>> @@ -321,7 +323,6 @@ struct nfsd4_session {
>>>    	u32			se_cb_slot_avail; /* bitmap of available slots */
>>>    	u32			se_cb_highest_slot;	/* highest slot client wants */
>>>    	u32			se_cb_prog;
>>> -	bool			se_dead;
>>>    	struct list_head	se_hash;	/* hash by sessionid */
>>>    	struct list_head	se_perclnt;
>>>    	struct nfs4_client	*se_client;
>>> @@ -331,6 +332,9 @@ struct nfsd4_session {
>>>    	struct list_head	se_conns;
>>>    	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>>>    	struct xarray		se_slots;	/* forward channel slots */
>>> +	u16			se_slot_gen;
>>> +	bool			se_dead;
>>> +	u32			se_target_maxslots;
>>>    };
>>>    
>>>    /* formatted contents of nfs4_sessionid */
>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>> index 382cc1389396..c26ba86dbdfd 100644
>>> --- a/fs/nfsd/xdr4.h
>>> +++ b/fs/nfsd/xdr4.h
>>> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>>>    	u32			slotid;			/* request/response */
>>>    	u32			maxslots;		/* request/response */
>>>    	u32			cachethis;		/* request */
>>> -#if 0
>>>    	u32			target_maxslots;	/* response */
>>> -#endif /* not yet */
>>>    	u32			status_flags;		/* response */
>>>    };
>>>    
>>
>> Hi Neil -
>>
>> I've found some misbehavior which I've bisected to this commit.
>>
>> If disconnect injection is set up to break the connection every 25,000
>> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
>> after this commit is applied.
>>
>> Network capture shows that the server eventually starts returning
>> SEQ_MISORDERED because the client has forgotten an retired slot after a
>> disconnect, and tries to use sequence number 1 for that slot with a new
>> operation.
>>
>> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
>> client. This function uses a bit of calculus to decide when to bump the
>> slot table's generation number. In the failing case, it never bumps the
>> generation, and that results in the client freeing slots it shouldn't.
>> The server's reported { highest, target_highest } slot numbers don't
>> appear to change correctly after the client has reconnected.
>>
>> If I revert this hunk from 5/6:
>>
>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres
>> *resp, __be32 nfserr,
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_target_highest_slotid */
>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_status_flags */
>>
>> the reproducer above runs to completion in the expected amount of time.
>>
>>
>> The high order bit here is whether I should drop these patches for
>> v6.14, or whether you believe you can come up with a narrow solution
>> during the early part of v6.14-rc that I can include in an -rc update
>> for NFSD. I can't really tell if a significant amount of surgery will
>> be necessary.
>>
>> What do you think?
> 
> I think I can fix it.
> 
> It sounds like it might be a client bug, or it might be a difference in
> interpretation of the spec, or it might be an ambiguity in the spec.

Perhaps, but consider that the Linux NFS client interoperates
successfully with v6.13 NFSD, Solaris's NFSv4.1 implementation,
NetApp's implementation, and Hammerspace, and has done for many
years.

I agree that the spec language isn't particularly transparent, but
somehow all of these implementations have interpreted it the same
way.


> But I think I can fix it by setting NFSD_SLOT_REUSED in any slots
> beyond ->target_maxslots when I reduce target_maxslots.  That makes it
> so that we accept either 1 or the next-in-sequence seqid.
> Doing that does open up a possible risk of a resend being accepted as a
> new request.  As it is a new connection, resends are a real possibility.
> I'll have to ponder that a bit more and might need to be careful about
> retiring slots with a low seqid which have been used recently.  Maybe I
> need to store the time when seqid 1 was used, and not return a slot
> until some minimum time after that timestamp.

I gathered additional information using a trace_printk() in the client's
decode_sequence() function.

In the working case, after reconnect, the server SEQUENCE response has

   sr_highest_slotid = 18
   sr_target_highest_slot = 18

IIUC 18 is the slot number of the highest slot currently in use.

In the stalling case, after reconnect, the server SEQUENCE response has

   sr_highest_slotid = 23
   sr_target_highest_slot = 63

The client heuristics decide to free slots 23 and higher, even though
one or more of those slots have been active and have sequence numbers
larger than 1.

I can provide more detailed reproducer instructions if you wish.


> I'll try to get you a patch before the end of next week.

Thanks!


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-19  2:01   ` Chuck Lever
  2025-01-21  2:36     ` NeilBrown
@ 2025-01-27  4:08     ` NeilBrown
  2025-01-27 13:57       ` Chuck Lever
  1 sibling, 1 reply; 33+ messages in thread
From: NeilBrown @ 2025-01-27  4:08 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Sun, 19 Jan 2025, Chuck Lever wrote:
> On 12/11/24 4:47 PM, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 16 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > se_dead is moved in struct nfsd4_session to remove a hole.
> > 
> > Reviewed-by: Jeff Layton <jlayton@kernel.org>
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >   fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
> >   fs/nfsd/nfs4xdr.c   |  5 ++-
> >   fs/nfsd/state.h     |  6 ++-
> >   fs/nfsd/xdr4.h      |  2 -
> >   4 files changed, 92 insertions(+), 15 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fd9473d487f3..a2d1f97b8a0e 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
> >   #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >   
> >   static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >   {
> >   	int i;
> >   
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >   		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >   
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> >   		free_svc_cred(&slot->sl_cred);
> >   		kfree(slot);
> >   	}
> > +	ses->se_fchannel.maxreqs = from;
> > +	if (ses->se_target_maxslots > from)
> > +		ses->se_target_maxslots = from;
> > +}
> > +
> > +/**
> > + * reduce_session_slots - reduce the target max-slots of a session if possible
> > + * @ses:  The session to affect
> > + * @dec:  how much to decrease the target by
> > + *
> > + * This interface can be used by a shrinker to reduce the target max-slots
> > + * for a session so that some slots can eventually be freed.
> > + * It uses spin_trylock() as it may be called in a context where another
> > + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> > + * best-effort, skiping a session is client_lock is already held has no
> > + * great coast
> > + *
> > + * Return value:
> > + *   The number of slots that the target was reduced by.
> > + */
> > +static int __maybe_unused
> > +reduce_session_slots(struct nfsd4_session *ses, int dec)
> > +{
> > +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> > +					  nfsd_net_id);
> > +	int ret = 0;
> > +
> > +	if (ses->se_target_maxslots <= 1)
> > +		return ret;
> > +	if (!spin_trylock(&nn->client_lock))
> > +		return ret;
> > +	ret = min(dec, ses->se_target_maxslots-1);
> > +	ses->se_target_maxslots -= ret;
> > +	ses->se_slot_gen += 1;
> > +	if (ses->se_slot_gen == 0) {
> > +		int i;
> > +		ses->se_slot_gen = 1;
> > +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > +			slot->sl_generation = 0;
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	return ret;
> >   }
> >   
> >   /*
> > @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >   	}
> >   	fattrs->maxreqs = i;
> >   	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> > +	new->se_target_maxslots = i;
> >   	new->se_cb_slot_avail = ~0U;
> >   	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
> >   				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> > @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
> >   
> >   static void __free_session(struct nfsd4_session *ses)
> >   {
> > -	free_session_slots(ses);
> > +	free_session_slots(ses, 0);
> >   	xa_destroy(&ses->se_slots);
> >   	kfree(ses);
> >   }
> > @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
> >   	seq_printf(m, "session slots:");
> >   	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >   		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > +	seq_printf(m, "\nsession target slots:");
> > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > +		seq_printf(m, " %u", ses->se_target_maxslots);
> >   	spin_unlock(&clp->cl_lock);
> >   	seq_puts(m, "\n");
> >   
> > @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
> >   	kfree(exid->server_impl_name);
> >   }
> >   
> > -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> > +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
> >   {
> >   	/* The slot is in use, and no response has been sent. */
> > -	if (slot_inuse) {
> > +	if (flags & NFSD4_SLOT_INUSE) {
> >   		if (seqid == slot_seqid)
> >   			return nfserr_jukebox;
> >   		else
> > @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >   	/* Note unsigned 32-bit arithmetic handles wraparound: */
> >   	if (likely(seqid == slot_seqid + 1))
> >   		return nfs_ok;
> > +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> > +		return nfs_ok;
> >   	if (seqid == slot_seqid)
> >   		return nfserr_replay_cache;
> >   	return nfserr_seq_misordered;
> > @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >   
> >   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> > -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> > -					slot->sl_flags & NFSD4_SLOT_INUSE);
> > +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
> >   	if (status == nfserr_replay_cache) {
> >   		status = nfserr_seq_misordered;
> >   		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> > @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	if (status)
> >   		goto out_put_session;
> >   
> > +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> > +	    slot->sl_generation == session->se_slot_gen &&
> > +	    seq->maxslots <= session->se_target_maxslots)
> > +		/* Client acknowledged our reduce maxreqs */
> > +		free_session_slots(session, session->se_target_maxslots);
> > +
> >   	buflen = (seq->cachethis) ?
> >   			session->se_fchannel.maxresp_cached :
> >   			session->se_fchannel.maxresp_sz;
> > @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	svc_reserve(rqstp, buflen);
> >   
> >   	status = nfs_ok;
> > -	/* Success! bump slot seqid */
> > +	/* Success! accept new slot seqid */
> >   	slot->sl_seqid = seq->seqid;
> > +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
> >   	slot->sl_flags |= NFSD4_SLOT_INUSE;
> > +	slot->sl_generation = session->se_slot_gen;
> >   	if (seq->cachethis)
> >   		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> >   	else
> > @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	 * the client might use.
> >   	 */
> >   	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
> >   	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> >   		int s = session->se_fchannel.maxreqs;
> >   		int cnt = DIV_ROUND_UP(s, 5);
> > +		void *prev_slot;
> >   
> >   		do {
> >   			/*
> > @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   			 */
> >   			slot = kzalloc(slot_bytes(&session->se_fchannel),
> >   				       GFP_NOWAIT);
> > +			prev_slot = xa_load(&session->se_slots, s);
> > +			if (xa_is_value(prev_slot) && slot) {
> > +				slot->sl_seqid = xa_to_value(prev_slot);
> > +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> > +			}
> >   			if (slot &&
> >   			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> >   						GFP_NOWAIT))) {
> >   				s += 1;
> >   				session->se_fchannel.maxreqs = s;
> > +				session->se_target_maxslots = s;
> >   			} else {
> >   				kfree(slot);
> >   				slot = NULL;
> >   			}
> >   		} while (slot && --cnt > 0);
> >   	}
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> > +	seq->target_maxslots = session->se_target_maxslots;
> >   
> >   out:
> >   	switch (clp->cl_cb_state) {
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 53fac037611c..4dcb03cd9292 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> >   		return nfserr_bad_xdr;
> >   	seq->seqid = be32_to_cpup(p++);
> >   	seq->slotid = be32_to_cpup(p++);
> > -	seq->maxslots = be32_to_cpup(p++);
> > +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> > +	seq->maxslots = be32_to_cpup(p++) + 1;
> >   	seq->cachethis = be32_to_cpup(p);
> >   
> >   	seq->status_flags = 0;
> > @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_target_highest_slotid */
> > -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> > +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_status_flags */
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index aad547d3ad8b..4251ff3c5ad1 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -245,10 +245,12 @@ struct nfsd4_slot {
> >   	struct svc_cred sl_cred;
> >   	u32	sl_datalen;
> >   	u16	sl_opcnt;
> > +	u16	sl_generation;
> >   #define NFSD4_SLOT_INUSE	(1 << 0)
> >   #define NFSD4_SLOT_CACHETHIS	(1 << 1)
> >   #define NFSD4_SLOT_INITIALIZED	(1 << 2)
> >   #define NFSD4_SLOT_CACHED	(1 << 3)
> > +#define NFSD4_SLOT_REUSED	(1 << 4)
> >   	u8	sl_flags;
> >   	char	sl_data[];
> >   };
> > @@ -321,7 +323,6 @@ struct nfsd4_session {
> >   	u32			se_cb_slot_avail; /* bitmap of available slots */
> >   	u32			se_cb_highest_slot;	/* highest slot client wants */
> >   	u32			se_cb_prog;
> > -	bool			se_dead;
> >   	struct list_head	se_hash;	/* hash by sessionid */
> >   	struct list_head	se_perclnt;
> >   	struct nfs4_client	*se_client;
> > @@ -331,6 +332,9 @@ struct nfsd4_session {
> >   	struct list_head	se_conns;
> >   	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
> >   	struct xarray		se_slots;	/* forward channel slots */
> > +	u16			se_slot_gen;
> > +	bool			se_dead;
> > +	u32			se_target_maxslots;
> >   };
> >   
> >   /* formatted contents of nfs4_sessionid */
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 382cc1389396..c26ba86dbdfd 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >   	u32			slotid;			/* request/response */
> >   	u32			maxslots;		/* request/response */
> >   	u32			cachethis;		/* request */
> > -#if 0
> >   	u32			target_maxslots;	/* response */
> > -#endif /* not yet */
> >   	u32			status_flags;		/* response */
> >   };
> >   
> 
> Hi Neil -
> 
> I've found some misbehavior which I've bisected to this commit.

Hi Chuck,
 could you please confirm that it really was this commit that you
 bisected to?  Not the next one?
 Because this commit never reduces ->se_target_maxslots, so the
 patch which you say removed the symptom should be a no-op.

 Even if it was the next commit I'm struggling to pin down the
 problem.  Here is my current analysis - partly to ensure I can present
 it clearly.

 The evidence suggests that the client has retired a slot that the
 server hasn't.  This happens when nfs41_set_server_slotid_locked()
 calls nfsd4_shrink_slot_table(), and nothing will happen if any slots
 before the new limit are still in use.  If the server reduces
 its idea of the target when the client isn't even using that many,
 the slots can be freed immediately that the client gets a reply
 indicating the new highest_slot number from the server.

 The server will not free these slots immediately but will wait to get a
 confirmation from the client that it has accepted the new limit.  But,
 importantly, the server will not increase the limit that it sends to
 the client until after it has has a chance to free the retired slots.
 If the server doesn't increase the limit, then the client won't try to
 use the retired slots...

 Do you still have the network trace which chows the error?  Would I be
 able to look at it?

Thanks,
NeilBrown


 

 

> 
> If disconnect injection is set up to break the connection every 25,000
> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
> after this commit is applied.
> 
> Network capture shows that the server eventually starts returning
> SEQ_MISORDERED because the client has forgotten an retired slot after a
> disconnect, and tries to use sequence number 1 for that slot with a new
> operation.
> 
> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
> client. This function uses a bit of calculus to decide when to bump the
> slot table's generation number. In the failing case, it never bumps the
> generation, and that results in the client freeing slots it shouldn't.
> The server's reported { highest, target_highest } slot numbers don't
> appear to change correctly after the client has reconnected.
> 
> If I revert this hunk from 5/6:
> 
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres 
> *resp, __be32 nfserr,
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_status_flags */
> 
> the reproducer above runs to completion in the expected amount of time.
> 
> 
> The high order bit here is whether I should drop these patches for
> v6.14, or whether you believe you can come up with a narrow solution
> during the early part of v6.14-rc that I can include in an -rc update
> for NFSD. I can't really tell if a significant amount of surgery will
> be necessary.
> 
> What do you think?
> 
> 
> -- 
> Chuck Lever
> 


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-27  4:08     ` NeilBrown
@ 2025-01-27 13:57       ` Chuck Lever
  2025-01-27 22:57         ` NeilBrown
  0 siblings, 1 reply; 33+ messages in thread
From: Chuck Lever @ 2025-01-27 13:57 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 1/26/25 11:08 PM, NeilBrown wrote:
> On Sun, 19 Jan 2025, Chuck Lever wrote:
>> On 12/11/24 4:47 PM, NeilBrown wrote:
>>> Reducing the number of slots in the session slot table requires
>>> confirmation from the client.  This patch adds reduce_session_slots()
>>> which starts the process of getting confirmation, but never calls it.
>>> That will come in a later patch.
>>>
>>> Before we can free a slot we need to confirm that the client won't try
>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>> is not larger than we limit we are trying to impose.  So for each slot
>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>
>>> To achieve this we introduce a concept of request "generations".  Each
>>> time we decide to reduce cr_maxrequests we increment the generation
>>> number, and record this when we return the lower cr_maxrequests to the
>>> client.  When a slot with the current generation reports a low
>>> ca_maxrequests, we commit to that level and free extra slots.
>>>
>>> We use an 16 bit generation number (64 seems wasteful) and if it cycles
>>> we iterate all slots and reset the generation number to avoid false matches.
>>>
>>> When we free a slot we store the seqid in the slot pointer so that it can
>>> be restored when we reactivate the slot.  The RFC can be read as
>>> suggesting that the slot number could restart from one after a slot is
>>> retired and reactivated, but also suggests that retiring slots is not
>>> required.  So when we reactive a slot we accept with the next seqid in
>>> sequence, or 1.
>>>
>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>> matches how it is encoded for the reply.
>>>
>>> se_dead is moved in struct nfsd4_session to remove a hole.
>>>
>>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>> ---
>>>    fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
>>>    fs/nfsd/nfs4xdr.c   |  5 ++-
>>>    fs/nfsd/state.h     |  6 ++-
>>>    fs/nfsd/xdr4.h      |  2 -
>>>    4 files changed, 92 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index fd9473d487f3..a2d1f97b8a0e 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
>>>    #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>    
>>>    static void
>>> -free_session_slots(struct nfsd4_session *ses)
>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>    {
>>>    	int i;
>>>    
>>> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +	if (from >= ses->se_fchannel.maxreqs)
>>> +		return;
>>> +
>>> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>    		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>    
>>> -		xa_erase(&ses->se_slots, i);
>>> +		/*
>>> +		 * Save the seqid in case we reactivate this slot.
>>> +		 * This will never require a memory allocation so GFP
>>> +		 * flag is irrelevant
>>> +		 */
>>> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>>>    		free_svc_cred(&slot->sl_cred);
>>>    		kfree(slot);
>>>    	}
>>> +	ses->se_fchannel.maxreqs = from;
>>> +	if (ses->se_target_maxslots > from)
>>> +		ses->se_target_maxslots = from;
>>> +}
>>> +
>>> +/**
>>> + * reduce_session_slots - reduce the target max-slots of a session if possible
>>> + * @ses:  The session to affect
>>> + * @dec:  how much to decrease the target by
>>> + *
>>> + * This interface can be used by a shrinker to reduce the target max-slots
>>> + * for a session so that some slots can eventually be freed.
>>> + * It uses spin_trylock() as it may be called in a context where another
>>> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
>>> + * best-effort, skiping a session is client_lock is already held has no
>>> + * great coast
>>> + *
>>> + * Return value:
>>> + *   The number of slots that the target was reduced by.
>>> + */
>>> +static int __maybe_unused
>>> +reduce_session_slots(struct nfsd4_session *ses, int dec)
>>> +{
>>> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
>>> +					  nfsd_net_id);
>>> +	int ret = 0;
>>> +
>>> +	if (ses->se_target_maxslots <= 1)
>>> +		return ret;
>>> +	if (!spin_trylock(&nn->client_lock))
>>> +		return ret;
>>> +	ret = min(dec, ses->se_target_maxslots-1);
>>> +	ses->se_target_maxslots -= ret;
>>> +	ses->se_slot_gen += 1;
>>> +	if (ses->se_slot_gen == 0) {
>>> +		int i;
>>> +		ses->se_slot_gen = 1;
>>> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>> +			slot->sl_generation = 0;
>>> +		}
>>> +	}
>>> +	spin_unlock(&nn->client_lock);
>>> +	return ret;
>>>    }
>>>    
>>>    /*
>>> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>>>    	}
>>>    	fattrs->maxreqs = i;
>>>    	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>>> +	new->se_target_maxslots = i;
>>>    	new->se_cb_slot_avail = ~0U;
>>>    	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>>>    				      NFSD_BC_SLOT_TABLE_SIZE - 1);
>>> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>>>    
>>>    static void __free_session(struct nfsd4_session *ses)
>>>    {
>>> -	free_session_slots(ses);
>>> +	free_session_slots(ses, 0);
>>>    	xa_destroy(&ses->se_slots);
>>>    	kfree(ses);
>>>    }
>>> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
>>>    	seq_printf(m, "session slots:");
>>>    	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>>    		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
>>> +	seq_printf(m, "\nsession target slots:");
>>> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>> +		seq_printf(m, " %u", ses->se_target_maxslots);
>>>    	spin_unlock(&clp->cl_lock);
>>>    	seq_puts(m, "\n");
>>>    
>>> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>>>    	kfree(exid->server_impl_name);
>>>    }
>>>    
>>> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>>>    {
>>>    	/* The slot is in use, and no response has been sent. */
>>> -	if (slot_inuse) {
>>> +	if (flags & NFSD4_SLOT_INUSE) {
>>>    		if (seqid == slot_seqid)
>>>    			return nfserr_jukebox;
>>>    		else
>>> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>>    	/* Note unsigned 32-bit arithmetic handles wraparound: */
>>>    	if (likely(seqid == slot_seqid + 1))
>>>    		return nfs_ok;
>>> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
>>> +		return nfs_ok;
>>>    	if (seqid == slot_seqid)
>>>    		return nfserr_replay_cache;
>>>    	return nfserr_seq_misordered;
>>> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>>>    
>>>    	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>>> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>>> -					slot->sl_flags & NFSD4_SLOT_INUSE);
>>> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>>>    	if (status == nfserr_replay_cache) {
>>>    		status = nfserr_seq_misordered;
>>>    		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
>>> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	if (status)
>>>    		goto out_put_session;
>>>    
>>> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
>>> +	    slot->sl_generation == session->se_slot_gen &&
>>> +	    seq->maxslots <= session->se_target_maxslots)
>>> +		/* Client acknowledged our reduce maxreqs */
>>> +		free_session_slots(session, session->se_target_maxslots);
>>> +
>>>    	buflen = (seq->cachethis) ?
>>>    			session->se_fchannel.maxresp_cached :
>>>    			session->se_fchannel.maxresp_sz;
>>> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	svc_reserve(rqstp, buflen);
>>>    
>>>    	status = nfs_ok;
>>> -	/* Success! bump slot seqid */
>>> +	/* Success! accept new slot seqid */
>>>    	slot->sl_seqid = seq->seqid;
>>> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>>>    	slot->sl_flags |= NFSD4_SLOT_INUSE;
>>> +	slot->sl_generation = session->se_slot_gen;
>>>    	if (seq->cachethis)
>>>    		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>>>    	else
>>> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	 * the client might use.
>>>    	 */
>>>    	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
>>> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>>>    	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>>>    		int s = session->se_fchannel.maxreqs;
>>>    		int cnt = DIV_ROUND_UP(s, 5);
>>> +		void *prev_slot;
>>>    
>>>    		do {
>>>    			/*
>>> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    			 */
>>>    			slot = kzalloc(slot_bytes(&session->se_fchannel),
>>>    				       GFP_NOWAIT);
>>> +			prev_slot = xa_load(&session->se_slots, s);
>>> +			if (xa_is_value(prev_slot) && slot) {
>>> +				slot->sl_seqid = xa_to_value(prev_slot);
>>> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
>>> +			}
>>>    			if (slot &&
>>>    			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>>>    						GFP_NOWAIT))) {
>>>    				s += 1;
>>>    				session->se_fchannel.maxreqs = s;
>>> +				session->se_target_maxslots = s;
>>>    			} else {
>>>    				kfree(slot);
>>>    				slot = NULL;
>>>    			}
>>>    		} while (slot && --cnt > 0);
>>>    	}
>>> -	seq->maxslots = session->se_fchannel.maxreqs;
>>> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
>>> +	seq->target_maxslots = session->se_target_maxslots;
>>>    
>>>    out:
>>>    	switch (clp->cl_cb_state) {
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 53fac037611c..4dcb03cd9292 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>>>    		return nfserr_bad_xdr;
>>>    	seq->seqid = be32_to_cpup(p++);
>>>    	seq->slotid = be32_to_cpup(p++);
>>> -	seq->maxslots = be32_to_cpup(p++);
>>> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
>>> +	seq->maxslots = be32_to_cpup(p++) + 1;
>>>    	seq->cachethis = be32_to_cpup(p);
>>>    
>>>    	seq->status_flags = 0;
>>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_target_highest_slotid */
>>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_status_flags */
>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>> index aad547d3ad8b..4251ff3c5ad1 100644
>>> --- a/fs/nfsd/state.h
>>> +++ b/fs/nfsd/state.h
>>> @@ -245,10 +245,12 @@ struct nfsd4_slot {
>>>    	struct svc_cred sl_cred;
>>>    	u32	sl_datalen;
>>>    	u16	sl_opcnt;
>>> +	u16	sl_generation;
>>>    #define NFSD4_SLOT_INUSE	(1 << 0)
>>>    #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>>>    #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>>>    #define NFSD4_SLOT_CACHED	(1 << 3)
>>> +#define NFSD4_SLOT_REUSED	(1 << 4)
>>>    	u8	sl_flags;
>>>    	char	sl_data[];
>>>    };
>>> @@ -321,7 +323,6 @@ struct nfsd4_session {
>>>    	u32			se_cb_slot_avail; /* bitmap of available slots */
>>>    	u32			se_cb_highest_slot;	/* highest slot client wants */
>>>    	u32			se_cb_prog;
>>> -	bool			se_dead;
>>>    	struct list_head	se_hash;	/* hash by sessionid */
>>>    	struct list_head	se_perclnt;
>>>    	struct nfs4_client	*se_client;
>>> @@ -331,6 +332,9 @@ struct nfsd4_session {
>>>    	struct list_head	se_conns;
>>>    	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>>>    	struct xarray		se_slots;	/* forward channel slots */
>>> +	u16			se_slot_gen;
>>> +	bool			se_dead;
>>> +	u32			se_target_maxslots;
>>>    };
>>>    
>>>    /* formatted contents of nfs4_sessionid */
>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>> index 382cc1389396..c26ba86dbdfd 100644
>>> --- a/fs/nfsd/xdr4.h
>>> +++ b/fs/nfsd/xdr4.h
>>> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>>>    	u32			slotid;			/* request/response */
>>>    	u32			maxslots;		/* request/response */
>>>    	u32			cachethis;		/* request */
>>> -#if 0
>>>    	u32			target_maxslots;	/* response */
>>> -#endif /* not yet */
>>>    	u32			status_flags;		/* response */
>>>    };
>>>    
>>
>> Hi Neil -
>>
>> I've found some misbehavior which I've bisected to this commit.
> 
> Hi Chuck,
>   could you please confirm that it really was this commit that you
>   bisected to?  Not the next one?

It's this one. I included the hunk that introduces the misbehavior
below. It's when the server starts returning a different value for
target_highest_slotid in the SEQUENCE result. The target_highest
is 63 -- the number the server has in its slot table. The maxslots
value is smaller.

In the working case, these two values never differ.


>   Because this commit never reduces ->se_target_maxslots, so the
>   patch which you say removed the symptom should be a no-op.

It's not the reducing of target_highest that's the problem. Rather
it's that the target_highest and max in-use slot IDs are different
values for a brief period after a reconnect.

That triggers the client to think that the server has reduced its
slot table size, so the client shrinks its slot table. The server has
not actually shrunken it, however, so it continues to expect the client
to use the large slot sequence numbers for those slots.

When the client starts to use one of those slots again, it uses a
sequence number of 1, and that fails.


>   Even if it was the next commit I'm struggling to pin down the
>   problem.  Here is my current analysis - partly to ensure I can present
>   it clearly.
> 
>   The evidence suggests that the client has retired a slot that the
>   server hasn't.  This happens when nfs41_set_server_slotid_locked()
>   calls nfsd4_shrink_slot_table(), and nothing will happen if any slots
>   before the new limit are still in use.  If the server reduces
>   its idea of the target when the client isn't even using that many,
>   the slots can be freed immediately that the client gets a reply
>   indicating the new highest_slot number from the server.
> 
>   The server will not free these slots immediately but will wait to get a
>   confirmation from the client that it has accepted the new limit.  But,
>   importantly, the server will not increase the limit that it sends to
>   the client until after it has has a chance to free the retired slots.
>   If the server doesn't increase the limit, then the client won't try to
>   use the retired slots...
> 
>   Do you still have the network trace which chows the error?  Would I be
>   able to look at it?

Sending via WeTransfer.

But also, it's easy enough to reproduce.

Build your server with CONFIG_FAIL_SUNRPC set. Reboot into the new
kernel.

Before each test, run this script on the server:

#!/usr/bin/bash

cd /sys/kernel/debug/fail_sunrpc/

echo Y > ignore-cache-wait
echo Y > ignore-client-disconnect
echo 24847 > interval
echo 97 > times
echo 100 > probability

exit 0

On the client, run fstests with an NFSv4.1 mount. It will usually hang
within the first 15 tests.


>> If disconnect injection is set up to break the connection every 25,000
>> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
>> after this commit is applied.
>>
>> Network capture shows that the server eventually starts returning
>> SEQ_MISORDERED because the client has forgotten an retired slot after a
>> disconnect, and tries to use sequence number 1 for that slot with a new
>> operation.
>>
>> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
>> client. This function uses a bit of calculus to decide when to bump the
>> slot table's generation number. In the failing case, it never bumps the
>> generation, and that results in the client freeing slots it shouldn't.
>> The server's reported { highest, target_highest } slot numbers don't
>> appear to change correctly after the client has reconnected.
>>
>> If I revert this hunk from 5/6:
>>
>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres
>> *resp, __be32 nfserr,
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_target_highest_slotid */
>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_status_flags */
>>
>> the reproducer above runs to completion in the expected amount of time.
>>
>>
>> The high order bit here is whether I should drop these patches for
>> v6.14, or whether you believe you can come up with a narrow solution
>> during the early part of v6.14-rc that I can include in an -rc update
>> for NFSD. I can't really tell if a significant amount of surgery will
>> be necessary.
>>
>> What do you think?
>>
>>
>> -- 
>> Chuck Lever
>>
> 


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-27 13:57       ` Chuck Lever
@ 2025-01-27 22:57         ` NeilBrown
  0 siblings, 0 replies; 33+ messages in thread
From: NeilBrown @ 2025-01-27 22:57 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Tue, 28 Jan 2025, Chuck Lever wrote:
> On 1/26/25 11:08 PM, NeilBrown wrote:
> > On Sun, 19 Jan 2025, Chuck Lever wrote:
> >> On 12/11/24 4:47 PM, NeilBrown wrote:
> >>> Reducing the number of slots in the session slot table requires
> >>> confirmation from the client.  This patch adds reduce_session_slots()
> >>> which starts the process of getting confirmation, but never calls it.
> >>> That will come in a later patch.
> >>>
> >>> Before we can free a slot we need to confirm that the client won't try
> >>> to use it again.  This involves returning a lower cr_maxrequests in a
> >>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> >>> is not larger than we limit we are trying to impose.  So for each slot
> >>> we need to remember that we have sent a reduced cr_maxrequests.
> >>>
> >>> To achieve this we introduce a concept of request "generations".  Each
> >>> time we decide to reduce cr_maxrequests we increment the generation
> >>> number, and record this when we return the lower cr_maxrequests to the
> >>> client.  When a slot with the current generation reports a low
> >>> ca_maxrequests, we commit to that level and free extra slots.
> >>>
> >>> We use an 16 bit generation number (64 seems wasteful) and if it cycles
> >>> we iterate all slots and reset the generation number to avoid false matches.
> >>>
> >>> When we free a slot we store the seqid in the slot pointer so that it can
> >>> be restored when we reactivate the slot.  The RFC can be read as
> >>> suggesting that the slot number could restart from one after a slot is
> >>> retired and reactivated, but also suggests that retiring slots is not
> >>> required.  So when we reactive a slot we accept with the next seqid in
> >>> sequence, or 1.
> >>>
> >>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> >>> matches how it is encoded for the reply.
> >>>
> >>> se_dead is moved in struct nfsd4_session to remove a hole.
> >>>
> >>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> >>> Signed-off-by: NeilBrown <neilb@suse.de>
> >>> ---
> >>>    fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
> >>>    fs/nfsd/nfs4xdr.c   |  5 ++-
> >>>    fs/nfsd/state.h     |  6 ++-
> >>>    fs/nfsd/xdr4.h      |  2 -
> >>>    4 files changed, 92 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >>> index fd9473d487f3..a2d1f97b8a0e 100644
> >>> --- a/fs/nfsd/nfs4state.c
> >>> +++ b/fs/nfsd/nfs4state.c
> >>> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
> >>>    #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >>>    
> >>>    static void
> >>> -free_session_slots(struct nfsd4_session *ses)
> >>> +free_session_slots(struct nfsd4_session *ses, int from)
> >>>    {
> >>>    	int i;
> >>>    
> >>> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> >>> +	if (from >= ses->se_fchannel.maxreqs)
> >>> +		return;
> >>> +
> >>> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >>>    		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >>>    
> >>> -		xa_erase(&ses->se_slots, i);
> >>> +		/*
> >>> +		 * Save the seqid in case we reactivate this slot.
> >>> +		 * This will never require a memory allocation so GFP
> >>> +		 * flag is irrelevant
> >>> +		 */
> >>> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> >>>    		free_svc_cred(&slot->sl_cred);
> >>>    		kfree(slot);
> >>>    	}
> >>> +	ses->se_fchannel.maxreqs = from;
> >>> +	if (ses->se_target_maxslots > from)
> >>> +		ses->se_target_maxslots = from;
> >>> +}
> >>> +
> >>> +/**
> >>> + * reduce_session_slots - reduce the target max-slots of a session if possible
> >>> + * @ses:  The session to affect
> >>> + * @dec:  how much to decrease the target by
> >>> + *
> >>> + * This interface can be used by a shrinker to reduce the target max-slots
> >>> + * for a session so that some slots can eventually be freed.
> >>> + * It uses spin_trylock() as it may be called in a context where another
> >>> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> >>> + * best-effort, skiping a session is client_lock is already held has no
> >>> + * great coast
> >>> + *
> >>> + * Return value:
> >>> + *   The number of slots that the target was reduced by.
> >>> + */
> >>> +static int __maybe_unused
> >>> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> >>> +{
> >>> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> >>> +					  nfsd_net_id);
> >>> +	int ret = 0;
> >>> +
> >>> +	if (ses->se_target_maxslots <= 1)
> >>> +		return ret;
> >>> +	if (!spin_trylock(&nn->client_lock))
> >>> +		return ret;
> >>> +	ret = min(dec, ses->se_target_maxslots-1);
> >>> +	ses->se_target_maxslots -= ret;
> >>> +	ses->se_slot_gen += 1;
> >>> +	if (ses->se_slot_gen == 0) {
> >>> +		int i;
> >>> +		ses->se_slot_gen = 1;
> >>> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> >>> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >>> +			slot->sl_generation = 0;
> >>> +		}
> >>> +	}
> >>> +	spin_unlock(&nn->client_lock);
> >>> +	return ret;
> >>>    }
> >>>    
> >>>    /*
> >>> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >>>    	}
> >>>    	fattrs->maxreqs = i;
> >>>    	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> >>> +	new->se_target_maxslots = i;
> >>>    	new->se_cb_slot_avail = ~0U;
> >>>    	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
> >>>    				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> >>> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
> >>>    
> >>>    static void __free_session(struct nfsd4_session *ses)
> >>>    {
> >>> -	free_session_slots(ses);
> >>> +	free_session_slots(ses, 0);
> >>>    	xa_destroy(&ses->se_slots);
> >>>    	kfree(ses);
> >>>    }
> >>> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
> >>>    	seq_printf(m, "session slots:");
> >>>    	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >>>    		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> >>> +	seq_printf(m, "\nsession target slots:");
> >>> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >>> +		seq_printf(m, " %u", ses->se_target_maxslots);
> >>>    	spin_unlock(&clp->cl_lock);
> >>>    	seq_puts(m, "\n");
> >>>    
> >>> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
> >>>    	kfree(exid->server_impl_name);
> >>>    }
> >>>    
> >>> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >>> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
> >>>    {
> >>>    	/* The slot is in use, and no response has been sent. */
> >>> -	if (slot_inuse) {
> >>> +	if (flags & NFSD4_SLOT_INUSE) {
> >>>    		if (seqid == slot_seqid)
> >>>    			return nfserr_jukebox;
> >>>    		else
> >>> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >>>    	/* Note unsigned 32-bit arithmetic handles wraparound: */
> >>>    	if (likely(seqid == slot_seqid + 1))
> >>>    		return nfs_ok;
> >>> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> >>> +		return nfs_ok;
> >>>    	if (seqid == slot_seqid)
> >>>    		return nfserr_replay_cache;
> >>>    	return nfserr_seq_misordered;
> >>> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >>>    
> >>>    	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> >>> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> >>> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> >>> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
> >>>    	if (status == nfserr_replay_cache) {
> >>>    		status = nfserr_seq_misordered;
> >>>    		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> >>> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	if (status)
> >>>    		goto out_put_session;
> >>>    
> >>> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> >>> +	    slot->sl_generation == session->se_slot_gen &&
> >>> +	    seq->maxslots <= session->se_target_maxslots)
> >>> +		/* Client acknowledged our reduce maxreqs */
> >>> +		free_session_slots(session, session->se_target_maxslots);
> >>> +
> >>>    	buflen = (seq->cachethis) ?
> >>>    			session->se_fchannel.maxresp_cached :
> >>>    			session->se_fchannel.maxresp_sz;
> >>> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	svc_reserve(rqstp, buflen);
> >>>    
> >>>    	status = nfs_ok;
> >>> -	/* Success! bump slot seqid */
> >>> +	/* Success! accept new slot seqid */
> >>>    	slot->sl_seqid = seq->seqid;
> >>> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
> >>>    	slot->sl_flags |= NFSD4_SLOT_INUSE;
> >>> +	slot->sl_generation = session->se_slot_gen;
> >>>    	if (seq->cachethis)
> >>>    		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> >>>    	else
> >>> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	 * the client might use.
> >>>    	 */
> >>>    	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> >>> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
> >>>    	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> >>>    		int s = session->se_fchannel.maxreqs;
> >>>    		int cnt = DIV_ROUND_UP(s, 5);
> >>> +		void *prev_slot;
> >>>    
> >>>    		do {
> >>>    			/*
> >>> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    			 */
> >>>    			slot = kzalloc(slot_bytes(&session->se_fchannel),
> >>>    				       GFP_NOWAIT);
> >>> +			prev_slot = xa_load(&session->se_slots, s);
> >>> +			if (xa_is_value(prev_slot) && slot) {
> >>> +				slot->sl_seqid = xa_to_value(prev_slot);
> >>> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> >>> +			}
> >>>    			if (slot &&
> >>>    			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> >>>    						GFP_NOWAIT))) {
> >>>    				s += 1;
> >>>    				session->se_fchannel.maxreqs = s;
> >>> +				session->se_target_maxslots = s;
> >>>    			} else {
> >>>    				kfree(slot);
> >>>    				slot = NULL;
> >>>    			}
> >>>    		} while (slot && --cnt > 0);
> >>>    	}
> >>> -	seq->maxslots = session->se_fchannel.maxreqs;
> >>> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> >>> +	seq->target_maxslots = session->se_target_maxslots;
> >>>    
> >>>    out:
> >>>    	switch (clp->cl_cb_state) {
> >>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> >>> index 53fac037611c..4dcb03cd9292 100644
> >>> --- a/fs/nfsd/nfs4xdr.c
> >>> +++ b/fs/nfsd/nfs4xdr.c
> >>> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> >>>    		return nfserr_bad_xdr;
> >>>    	seq->seqid = be32_to_cpup(p++);
> >>>    	seq->slotid = be32_to_cpup(p++);
> >>> -	seq->maxslots = be32_to_cpup(p++);
> >>> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> >>> +	seq->maxslots = be32_to_cpup(p++) + 1;
> >>>    	seq->cachethis = be32_to_cpup(p);
> >>>    
> >>>    	seq->status_flags = 0;
> >>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
> >>>    	if (nfserr != nfs_ok)
> >>>    		return nfserr;
> >>>    	/* sr_target_highest_slotid */
> >>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> >>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
> >>>    	if (nfserr != nfs_ok)
> >>>    		return nfserr;
> >>>    	/* sr_status_flags */
> >>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> >>> index aad547d3ad8b..4251ff3c5ad1 100644
> >>> --- a/fs/nfsd/state.h
> >>> +++ b/fs/nfsd/state.h
> >>> @@ -245,10 +245,12 @@ struct nfsd4_slot {
> >>>    	struct svc_cred sl_cred;
> >>>    	u32	sl_datalen;
> >>>    	u16	sl_opcnt;
> >>> +	u16	sl_generation;
> >>>    #define NFSD4_SLOT_INUSE	(1 << 0)
> >>>    #define NFSD4_SLOT_CACHETHIS	(1 << 1)
> >>>    #define NFSD4_SLOT_INITIALIZED	(1 << 2)
> >>>    #define NFSD4_SLOT_CACHED	(1 << 3)
> >>> +#define NFSD4_SLOT_REUSED	(1 << 4)
> >>>    	u8	sl_flags;
> >>>    	char	sl_data[];
> >>>    };
> >>> @@ -321,7 +323,6 @@ struct nfsd4_session {
> >>>    	u32			se_cb_slot_avail; /* bitmap of available slots */
> >>>    	u32			se_cb_highest_slot;	/* highest slot client wants */
> >>>    	u32			se_cb_prog;
> >>> -	bool			se_dead;
> >>>    	struct list_head	se_hash;	/* hash by sessionid */
> >>>    	struct list_head	se_perclnt;
> >>>    	struct nfs4_client	*se_client;
> >>> @@ -331,6 +332,9 @@ struct nfsd4_session {
> >>>    	struct list_head	se_conns;
> >>>    	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
> >>>    	struct xarray		se_slots;	/* forward channel slots */
> >>> +	u16			se_slot_gen;
> >>> +	bool			se_dead;
> >>> +	u32			se_target_maxslots;
> >>>    };
> >>>    
> >>>    /* formatted contents of nfs4_sessionid */
> >>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> >>> index 382cc1389396..c26ba86dbdfd 100644
> >>> --- a/fs/nfsd/xdr4.h
> >>> +++ b/fs/nfsd/xdr4.h
> >>> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >>>    	u32			slotid;			/* request/response */
> >>>    	u32			maxslots;		/* request/response */
> >>>    	u32			cachethis;		/* request */
> >>> -#if 0
> >>>    	u32			target_maxslots;	/* response */
> >>> -#endif /* not yet */
> >>>    	u32			status_flags;		/* response */
> >>>    };
> >>>    
> >>
> >> Hi Neil -
> >>
> >> I've found some misbehavior which I've bisected to this commit.
> > 
> > Hi Chuck,
> >   could you please confirm that it really was this commit that you
> >   bisected to?  Not the next one?
> 
> It's this one. I included the hunk that introduces the misbehavior
> below. It's when the server starts returning a different value for
> target_highest_slotid in the SEQUENCE result. The target_highest
> is 63 -- the number the server has in its slot table. The maxslots
> value is smaller.
> 
> In the working case, these two values never differ.
> 
> 
> >   Because this commit never reduces ->se_target_maxslots, so the
> >   patch which you say removed the symptom should be a no-op.
> 
> It's not the reducing of target_highest that's the problem. Rather
> it's that the target_highest and max in-use slot IDs are different
> values for a brief period after a reconnect.
> 
> That triggers the client to think that the server has reduced its
> slot table size, so the client shrinks its slot table. The server has
> not actually shrunken it, however, so it continues to expect the client
> to use the large slot sequence numbers for those slots.
> 
> When the client starts to use one of those slots again, it uses a
> sequence number of 1, and that fails.
> 
> 
> >   Even if it was the next commit I'm struggling to pin down the
> >   problem.  Here is my current analysis - partly to ensure I can present
> >   it clearly.
> > 
> >   The evidence suggests that the client has retired a slot that the
> >   server hasn't.  This happens when nfs41_set_server_slotid_locked()
> >   calls nfsd4_shrink_slot_table(), and nothing will happen if any slots
> >   before the new limit are still in use.  If the server reduces
> >   its idea of the target when the client isn't even using that many,
> >   the slots can be freed immediately that the client gets a reply
> >   indicating the new highest_slot number from the server.
> > 
> >   The server will not free these slots immediately but will wait to get a
> >   confirmation from the client that it has accepted the new limit.  But,
> >   importantly, the server will not increase the limit that it sends to
> >   the client until after it has has a chance to free the retired slots.
> >   If the server doesn't increase the limit, then the client won't try to
> >   use the retired slots...
> > 
> >   Do you still have the network trace which chows the error?  Would I be
> >   able to look at it?
> 
> Sending via WeTransfer.

Thanks.  I see the problem clearly now.  It isn't so much that 'high
slot' and 'target' are different, it is that they are both wrong.
'highslot' (which is the max the server will accept) is typically 0 or
4, and 'target' is 63.

The 'highest slot' has been copied from what the client said was the max
currently in use.  I don't know where the 'target' came from, maybe from
the previous reply sent.

This bug was introduced in 
   nfsd: allocate new session-based DRC slots on demand.
I should have move the seq->maxslots assignment after the "out:", not
before.

I'll send a patch.


> 
> But also, it's easy enough to reproduce.
> 
> Build your server with CONFIG_FAIL_SUNRPC set. Reboot into the new
> kernel.
> 
> Before each test, run this script on the server:
> 
> #!/usr/bin/bash
> 
> cd /sys/kernel/debug/fail_sunrpc/
> 
> echo Y > ignore-cache-wait
> echo Y > ignore-client-disconnect
> echo 24847 > interval
> echo 97 > times
> echo 100 > probability
> 
> exit 0
> 
> On the client, run fstests with an NFSv4.1 mount. It will usually hang
> within the first 15 tests.
> 

I might give that a try - thanks.

NeilBrown

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2025-01-27 22:57 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-06  0:43 [PATCH 0/6 v3] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-12-06  0:43 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
2024-12-06  0:43 ` [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC NeilBrown
2024-12-06  0:43 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
2024-12-06  0:43 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
2024-12-06  1:04   ` Jeff Layton
2024-12-06  1:43     ` NeilBrown
2024-12-06 13:49       ` Jeff Layton
2024-12-06 20:51   ` Chuck Lever
2024-12-08  4:52     ` NeilBrown
2024-12-06  0:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2024-12-06  5:30   ` Jeff Layton
2024-12-06  6:05     ` NeilBrown
2024-12-06 13:59       ` Jeff Layton
2024-12-06  0:43 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
  -- strict thread matches above, loose matches on Subject: below --
2024-12-11 21:47 [PATCH 0/6 v5] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-12-11 21:47 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2025-01-19  2:01   ` Chuck Lever
2025-01-21  2:36     ` NeilBrown
2025-01-21 16:24       ` Chuck Lever
2025-01-27  4:08     ` NeilBrown
2025-01-27 13:57       ` Chuck Lever
2025-01-27 22:57         ` NeilBrown
2024-12-08 22:43 [PATCH 0/6 v4] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-12-08 22:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2024-11-19 19:25   ` Chuck Lever
2024-11-19 22:35     ` NeilBrown
2024-11-20  1:27       ` Chuck Lever
2024-11-21 21:47         ` NeilBrown
2024-11-21 22:29           ` Chuck Lever III
2024-12-02 16:11             ` Chuck Lever III
2024-12-03  4:28               ` NeilBrown
2024-12-03 14:40                 ` Chuck Lever III
2024-11-19 19:48   ` Jeff Layton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox