[PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand

public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand
@ 2024-11-19  0:41 NeilBrown
  2024-11-19  0:41 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
                   ` (6 more replies)
  0 siblings, 7 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Here is v2 of my series for on-demand allocation and freeing of session DRC slots.

- Now uses an xarray to store slots, and the limit is raised to 2048
- delays retiring a slot until the client has confirmed that it isn't
  using it as described in RFC:

      The replier SHOULD retain the slots it wants to retire until the
      requester sends a request with a highest_slotid less than or equal
      to the replier's new enforced highest_slotid.

- When a retired slot is used, allow the seqid to be the next in sequence
  as required by the RFC:

         Each time a slot is reused, the request MUST specify a sequence
         ID that is one greater than that of the previous request on the
         slot.

  or "1" as (arguably) allowed by the RFC:

         The first time a slot is used, the requester MUST specify a
         sequence ID of one

- current slot allocation is now reported in /proc/fs/nfsd/clients/*/info

This has been tested with highly aggressive shrinker settings:
	nfsd_slot_shrinker->seeks = 0;
	nfsd_slot_shrinker->batch = 2;

and with periodic "echo 3 > drop_caches".  The slot count drops as
expected and then increases again.

NeilBrown



^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19  0:41 ` [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC NeilBrown
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Using an xarray to store session slots will make it easier to change the
number of active slots based on demand, and removes an unnecessary
limit.

To achieve good throughput with a high-latency server it can be helpful
to have hundreds of concurrent writes, which means hundreds of slots.
So increase the limit to 2048 (twice what the Linux client will
currently use).

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 28 ++++++++++++++++++----------
 fs/nfsd/state.h     |  8 +++++---
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index e0daf8b3982c..b48c1423d89b 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1915,8 +1915,11 @@ free_session_slots(struct nfsd4_session *ses)
 	int i;
 
 	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
-		free_svc_cred(&ses->se_slots[i]->sl_cred);
-		kfree(ses->se_slots[i]);
+		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+
+		xa_erase(&ses->se_slots, i);
+		free_svc_cred(&slot->sl_cred);
+		kfree(slot);
 	}
 }
 
@@ -1996,17 +1999,20 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	struct nfsd4_session *new;
 	int i;
 
-	BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
-		     > PAGE_SIZE);
-
-	new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
 	if (!new)
 		return NULL;
+	xa_init(&new->se_slots);
 	/* allocate each struct nfsd4_slot and data cache in one piece */
 	for (i = 0; i < numslots; i++) {
-		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
-		if (!new->se_slots[i])
+		struct nfsd4_slot *slot;
+		slot = kzalloc(slotsize, GFP_KERNEL);
+		if (!slot)
 			goto out_free;
+		if (xa_is_err(xa_store(&new->se_slots, i, slot, GFP_KERNEL))) {
+			kfree(slot);
+			goto out_free;
+		}
 	}
 
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
@@ -2017,7 +2023,8 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	return new;
 out_free:
 	while (i--)
-		kfree(new->se_slots[i]);
+		kfree(xa_load(&new->se_slots, i));
+	xa_destroy(&new->se_slots);
 	kfree(new);
 	return NULL;
 }
@@ -2124,6 +2131,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 static void __free_session(struct nfsd4_session *ses)
 {
 	free_session_slots(ses);
+	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
 
@@ -4292,7 +4300,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (seq->slotid >= session->se_fchannel.maxreqs)
 		goto out_put_session;
 
-	slot = session->se_slots[seq->slotid];
+	slot = xa_load(&session->se_slots, seq->slotid);
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	/* We do not negotiate the number of slots yet, so set the
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 554041da8593..e97626916a68 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -245,8 +245,10 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
 	return container_of(s, struct nfs4_delegation, dl_stid);
 }
 
-/* Maximum number of slots per session. 160 is useful for long haul TCP */
-#define NFSD_MAX_SLOTS_PER_SESSION     160
+/* Maximum number of slots per session.  A large number can be need to
+ * get good throughput on high-latency servers.
+ */
+#define NFSD_MAX_SLOTS_PER_SESSION	2048
 /* Maximum  session per slot cache size */
 #define NFSD_SLOT_CACHE_SIZE		2048
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
@@ -345,7 +347,7 @@ struct nfsd4_session {
 	struct nfsd4_cb_sec	se_cb_sec;
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
-	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
+	struct xarray		se_slots;	/* forward channel slots */
 };
 
 /* formatted contents of nfs4_sessionid */
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
  2024-11-19  0:41 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19  0:41 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Rather than guessing how much space it might be safe to use for the DRC,
simply try allocating slots and be prepared to accept failure.

The first slot for each session is allocated with GFP_KERNEL which is
unlikely to fail.  Subsequent slots are allocated with the addition of
__GFP_NORETRY which is expected to fail if there isn't much free memory.

This is probably too aggressive but clears the way for adding a
shrinker interface to free extra slots when memory is tight.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 91 +++++++--------------------------------------
 fs/nfsd/nfsd.h      |  3 --
 fs/nfsd/nfssvc.c    | 32 ----------------
 3 files changed, 14 insertions(+), 112 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index b48c1423d89b..3889ba1c653f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1938,65 +1938,13 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
 	return size + sizeof(struct nfsd4_slot);
 }
 
-/*
- * XXX: If we run out of reserved DRC memory we could (up to a point)
- * re-negotiate active sessions and reduce their slot usage to make
- * room for new connections. For now we just fail the create session.
- */
-static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
-{
-	u32 slotsize = slot_bytes(ca);
-	u32 num = ca->maxreqs;
-	unsigned long avail, total_avail;
-	unsigned int scale_factor;
-
-	spin_lock(&nfsd_drc_lock);
-	if (nfsd_drc_max_mem > nfsd_drc_mem_used)
-		total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
-	else
-		/* We have handed out more space than we chose in
-		 * set_max_drc() to allow.  That isn't really a
-		 * problem as long as that doesn't make us think we
-		 * have lots more due to integer overflow.
-		 */
-		total_avail = 0;
-	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
-	/*
-	 * Never use more than a fraction of the remaining memory,
-	 * unless it's the only way to give this client a slot.
-	 * The chosen fraction is either 1/8 or 1/number of threads,
-	 * whichever is smaller.  This ensures there are adequate
-	 * slots to support multiple clients per thread.
-	 * Give the client one slot even if that would require
-	 * over-allocation--it is better than failure.
-	 */
-	scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
-
-	avail = clamp_t(unsigned long, avail, slotsize,
-			total_avail/scale_factor);
-	num = min_t(int, num, avail / slotsize);
-	num = max_t(int, num, 1);
-	nfsd_drc_mem_used += num * slotsize;
-	spin_unlock(&nfsd_drc_lock);
-
-	return num;
-}
-
-static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
-{
-	int slotsize = slot_bytes(ca);
-
-	spin_lock(&nfsd_drc_lock);
-	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
-	spin_unlock(&nfsd_drc_lock);
-}
-
 static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 					   struct nfsd4_channel_attrs *battrs)
 {
 	int numslots = fattrs->maxreqs;
 	int slotsize = slot_bytes(fattrs);
 	struct nfsd4_session *new;
+	struct nfsd4_slot *slot;
 	int i;
 
 	new = kzalloc(sizeof(*new), GFP_KERNEL);
@@ -2004,17 +1952,20 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 		return NULL;
 	xa_init(&new->se_slots);
 	/* allocate each struct nfsd4_slot and data cache in one piece */
-	for (i = 0; i < numslots; i++) {
-		struct nfsd4_slot *slot;
-		slot = kzalloc(slotsize, GFP_KERNEL);
+	slot = kzalloc(slotsize, GFP_KERNEL);
+	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
+		goto out_free;
+
+	for (i = 1; i < numslots; i++) {
+		slot = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
 		if (!slot)
-			goto out_free;
+			break;
 		if (xa_is_err(xa_store(&new->se_slots, i, slot, GFP_KERNEL))) {
 			kfree(slot);
-			goto out_free;
+			break;
 		}
 	}
-
+	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
@@ -2022,8 +1973,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	spin_lock_init(&new->se_lock);
 	return new;
 out_free:
-	while (i--)
-		kfree(xa_load(&new->se_slots, i));
+	kfree(slot);
 	xa_destroy(&new->se_slots);
 	kfree(new);
 	return NULL;
@@ -2138,7 +2088,6 @@ static void __free_session(struct nfsd4_session *ses)
 static void free_session(struct nfsd4_session *ses)
 {
 	nfsd4_del_conns(ses);
-	nfsd4_put_drc_mem(&ses->se_fchannel);
 	__free_session(ses);
 }
 
@@ -3800,17 +3749,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
 	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
 			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
 	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
-	/*
-	 * Note decreasing slot size below client's request may make it
-	 * difficult for client to function correctly, whereas
-	 * decreasing the number of slots will (just?) affect
-	 * performance.  When short on memory we therefore prefer to
-	 * decrease number of slots instead of their size.  Clients that
-	 * request larger slots than they need will get poor results:
-	 * Note that we always allow at least one slot, because our
-	 * accounting is soft and provides no guarantees either way.
-	 */
-	ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
 
 	return nfs_ok;
 }
@@ -3888,11 +3826,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 		return status;
 	status = check_backchannel_attrs(&cr_ses->back_channel);
 	if (status)
-		goto out_release_drc_mem;
+		goto out_err;
 	status = nfserr_jukebox;
 	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
 	if (!new)
-		goto out_release_drc_mem;
+		goto out_err;
 	conn = alloc_conn_from_crses(rqstp, cr_ses);
 	if (!conn)
 		goto out_free_session;
@@ -4001,8 +3939,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 	free_conn(conn);
 out_free_session:
 	__free_session(new);
-out_release_drc_mem:
-	nfsd4_put_drc_mem(&cr_ses->fore_channel);
+out_err:
 	return status;
 }
 
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index f007699aa397..e2997f0ffbc5 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -88,9 +88,6 @@ struct nfsd_genl_rqstp {
 extern struct svc_program	nfsd_programs[];
 extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
 extern struct mutex		nfsd_mutex;
-extern spinlock_t		nfsd_drc_lock;
-extern unsigned long		nfsd_drc_max_mem;
-extern unsigned long		nfsd_drc_mem_used;
 extern atomic_t			nfsd_th_cnt;		/* number of available threads */
 
 extern const struct seq_operations nfs_exports_op;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 49e2f32102ab..3dbaefc96608 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -70,16 +70,6 @@ static __be32			nfsd_init_request(struct svc_rqst *,
  */
 DEFINE_MUTEX(nfsd_mutex);
 
-/*
- * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
- * nfsd_drc_max_pages limits the total amount of memory available for
- * version 4.1 DRC caches.
- * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
- */
-DEFINE_SPINLOCK(nfsd_drc_lock);
-unsigned long	nfsd_drc_max_mem;
-unsigned long	nfsd_drc_mem_used;
-
 #if IS_ENABLED(CONFIG_NFS_LOCALIO)
 static const struct svc_version *localio_versions[] = {
 	[1] = &localio_version1,
@@ -575,27 +565,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
 		}
 }
 
-/*
- * Each session guarantees a negotiated per slot memory cache for replies
- * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
- * NFSv4.1 server might want to use more memory for a DRC than a machine
- * with mutiple services.
- *
- * Impose a hard limit on the number of pages for the DRC which varies
- * according to the machines free pages. This is of course only a default.
- *
- * For now this is a #defined shift which could be under admin control
- * in the future.
- */
-static void set_max_drc(void)
-{
-	#define NFSD_DRC_SIZE_SHIFT	7
-	nfsd_drc_max_mem = (nr_free_buffer_pages()
-					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
-	nfsd_drc_mem_used = 0;
-	dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
-}
-
 static int nfsd_get_default_max_blksize(void)
 {
 	struct sysinfo i;
@@ -678,7 +647,6 @@ int nfsd_create_serv(struct net *net)
 	nn->nfsd_serv = serv;
 	spin_unlock(&nfsd_notifier_lock);
 
-	set_max_drc();
 	/* check if the notifier is already set */
 	if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
 		register_inetaddr_notifier(&nfsd_inetaddr_notifier);
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
  2024-11-19  0:41 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
  2024-11-19  0:41 ` [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19 19:14   ` Chuck Lever
  2024-11-19 19:21   ` Chuck Lever
  2024-11-19  0:41 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
                   ` (3 subsequent siblings)
  6 siblings, 2 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Each client now reports the number of slots allocated in each session.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 3889ba1c653f..31ff9f92a895 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
 static int client_info_show(struct seq_file *m, void *v)
 {
 	struct inode *inode = file_inode(m->file);
+	struct nfsd4_session *ses;
 	struct nfs4_client *clp;
 	u64 clid;
 
@@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
 	seq_printf(m, "admin-revoked states: %d\n",
 		   atomic_read(&clp->cl_admin_revoked));
+	seq_printf(m, "session slots:");
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	spin_unlock(&clp->cl_lock);
+	seq_puts(m, "\n");
+
 	drop_client(clp);
 
 	return 0;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (2 preceding siblings ...)
  2024-11-19  0:41 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19 19:20   ` Chuck Lever
  2024-11-19 19:34   ` Jeff Layton
  2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
                   ` (2 subsequent siblings)
  6 siblings, 2 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

If a client ever uses the highest available slot for a given session,
attempt to allocate another slot so there is room for the client to use
more slots if wanted.  GFP_NOWAIT is used so if there is not plenty of
free memory, failure is expected - which is what we want.  It also
allows the allocation while holding a spinlock.

We would expect to stablise with one more slot available than the client
actually uses.

Now that we grow the slot table on demand we can start with a smaller
allocation.  Define NFSD_MAX_INITIAL_SLOTS and allocate at most that
many when session is created.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 32 ++++++++++++++++++++++++++------
 fs/nfsd/state.h     |  2 ++
 2 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 31ff9f92a895..fb522165b376 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1956,7 +1956,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
 		goto out_free;
 
-	for (i = 1; i < numslots; i++) {
+	for (i = 1; i < numslots && i < NFSD_MAX_INITIAL_SLOTS; i++) {
 		slot = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
 		if (!slot)
 			break;
@@ -4248,11 +4248,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	slot = xa_load(&session->se_slots, seq->slotid);
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
-	/* We do not negotiate the number of slots yet, so set the
-	 * maxslots to the session maxreqs which is used to encode
-	 * sr_highest_slotid and the sr_target_slot id to maxslots */
-	seq->maxslots = session->se_fchannel.maxreqs;
-
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
 	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
 					slot->sl_flags & NFSD4_SLOT_INUSE);
@@ -4302,6 +4297,31 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	cstate->session = session;
 	cstate->clp = clp;
 
+	/*
+	 * If the client ever uses the highest available slot,
+	 * gently try to allocate another one.
+	 */
+	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
+		int s = session->se_fchannel.maxreqs;
+
+		/*
+		 * GFP_NOWAIT is a low-priority non-blocking allocation
+		 * which can be used under client_lock and only succeeds
+		 * if there is plenty of memory.
+		 * Use GFP_ATOMIC which is higher priority for xa_store()
+		 * so we are less likely to waste the effort of the first
+		 * allocation.
+		 */
+		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
+		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
+						GFP_ATOMIC)))
+			session->se_fchannel.maxreqs += 1;
+		else
+			kfree(slot);
+	}
+	seq->maxslots = session->se_fchannel.maxreqs;
+
 out:
 	switch (clp->cl_cb_state) {
 	case NFSD4_CB_DOWN:
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index e97626916a68..a14a823670e9 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -249,6 +249,8 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
  * get good throughput on high-latency servers.
  */
 #define NFSD_MAX_SLOTS_PER_SESSION	2048
+/* Maximum number of slots per session to allocate for CREATE_SESSION */
+#define NFSD_MAX_INITIAL_SLOTS		32
 /* Maximum  session per slot cache size */
 #define NFSD_SLOT_CACHE_SIZE		2048
 /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (3 preceding siblings ...)
  2024-11-19  0:41 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19 19:25   ` Chuck Lever
  2024-11-19 19:48   ` Jeff Layton
  2024-11-19  0:41 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
  2024-11-19 21:31 ` [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand Jeff Layton
  6 siblings, 2 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 8 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
 fs/nfsd/nfs4xdr.c   |  5 +--
 fs/nfsd/state.h     |  4 +++
 fs/nfsd/xdr4.h      |  2 --
 4 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fb522165b376..0625b0aec6b8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
+			 GFP_ATOMIC);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
@@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * gently try to allocate another one.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
+		void *prev_slot;
 
 		/*
 		 * GFP_NOWAIT is a low-priority non-blocking allocation
@@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		 * allocation.
 		 */
 		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
+		prev_slot = xa_load(&session->se_slots, s);
+		if (xa_is_value(prev_slot) && slot) {
+			slot->sl_seqid = xa_to_value(prev_slot);
+			slot->sl_flags |= NFSD4_SLOT_REUSED;
+		}
 		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
-						GFP_ATOMIC)))
+						GFP_ATOMIC))) {
 			session->se_fchannel.maxreqs += 1;
-		else
+			session->se_target_maxslots = session->se_fchannel.maxreqs;
+		} else {
 			kfree(slot);
+		}
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5c79494bd20b..b281a2198ff3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index a14a823670e9..ea6659d52be2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -268,7 +268,9 @@ struct nfsd4_slot {
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
+	u8	sl_generation;
 	char	sl_data[];
 };
 
@@ -350,6 +352,8 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u8			se_slot_gen;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (4 preceding siblings ...)
  2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2024-11-19  0:41 ` NeilBrown
  2024-11-19 19:28   ` Chuck Lever
  2024-11-19 21:17   ` Jeff Layton
  2024-11-19 21:31 ` [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand Jeff Layton
  6 siblings, 2 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19  0:41 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Add a shrinker which frees unused slots and may ask the clients to use
fewer slots on each session.

Each session now tracks se_client_maxreqs which is the most recent
max-requests-in-use reported by the client, and se_target_maxreqs which
is a target number of requests which is reduced by the shrinker.

The shrinker iterates over all sessions on all client in all
net-namespaces and reduces the target by 1 for each.  The shrinker may
get called multiple times to reduce by more than 1 each.

If se_target_maxreqs is above se_client_maxreqs, those slots can be
freed immediately.  If not the client will be ask to reduce its usage
and as the usage goes down slots will be freed.

Once the usage has dropped to match the target, the target can be
increased if the client uses all available slots and if a GFP_NOWAIT
allocation succeeds.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 72 ++++++++++++++++++++++++++++++++++++++++++---
 fs/nfsd/state.h     |  1 +
 2 files changed, 69 insertions(+), 4 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0625b0aec6b8..ac49c3bd0dcb 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
  */
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
+static struct shrinker *nfsd_slot_shrinker;
+static DEFINE_SPINLOCK(nfsd_session_list_lock);
+static LIST_HEAD(nfsd_session_list);
+/* The sum of "target_slots-1" on every session.  The shrinker can push this
+ * down, though it can take a little while for the memory to actually
+ * be freed.  The "-1" is because we can never free slot 0 while the
+ * session is active.
+ */
+static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
+
 static void
 free_session_slots(struct nfsd4_session *ses, int from)
 {
@@ -1931,11 +1941,14 @@ free_session_slots(struct nfsd4_session *ses, int from)
 		kfree(slot);
 	}
 	ses->se_fchannel.maxreqs = from;
-	if (ses->se_target_maxslots > from)
-		ses->se_target_maxslots = from;
+	if (ses->se_target_maxslots > from) {
+		int new_target = from ?: 1;
+		atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
+		ses->se_target_maxslots = new_target;
+	}
 }
 
-static int __maybe_unused
+static int
 reduce_session_slots(struct nfsd4_session *ses, int dec)
 {
 	struct nfsd_net *nn = net_generic(ses->se_client->net,
@@ -1948,6 +1961,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec)
 		return ret;
 	ret = min(dec, ses->se_target_maxslots-1);
 	ses->se_target_maxslots -= ret;
+	atomic_sub(ret, &nfsd_total_target_slots);
 	ses->se_slot_gen += 1;
 	if (ses->se_slot_gen == 0) {
 		int i;
@@ -2006,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
 	new->se_target_maxslots = i;
+	atomic_add(i - 1, &nfsd_total_target_slots);
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2130,6 +2145,36 @@ static void free_session(struct nfsd4_session *ses)
 	__free_session(ses);
 }
 
+static unsigned long
+nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
+{
+	unsigned long cnt = atomic_read(&nfsd_total_target_slots);
+
+	return cnt ? cnt : SHRINK_EMPTY;
+}
+
+static unsigned long
+nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
+{
+	struct nfsd4_session *ses;
+	unsigned long scanned = 0;
+	unsigned long freed = 0;
+
+	spin_lock(&nfsd_session_list_lock);
+	list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
+		freed += reduce_session_slots(ses, 1);
+		scanned += 1;
+		if (scanned >= sc->nr_to_scan) {
+			/* Move starting point for next scan */
+			list_move(&nfsd_session_list, &ses->se_all_sessions);
+			break;
+		}
+	}
+	spin_unlock(&nfsd_session_list_lock);
+	sc->nr_scanned = scanned;
+	return freed;
+}
+
 static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
 	int idx;
@@ -2154,6 +2199,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
 	list_add(&new->se_perclnt, &clp->cl_sessions);
 	spin_unlock(&clp->cl_lock);
 
+	spin_lock(&nfsd_session_list_lock);
+	list_add_tail(&new->se_all_sessions, &nfsd_session_list);
+	spin_unlock(&nfsd_session_list_lock);
+
 	{
 		struct sockaddr *sa = svc_addr(rqstp);
 		/*
@@ -2223,6 +2272,9 @@ unhash_session(struct nfsd4_session *ses)
 	spin_lock(&ses->se_client->cl_lock);
 	list_del(&ses->se_perclnt);
 	spin_unlock(&ses->se_client->cl_lock);
+	spin_lock(&nfsd_session_list_lock);
+	list_del(&ses->se_all_sessions);
+	spin_unlock(&nfsd_session_list_lock);
 }
 
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
@@ -4335,6 +4387,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	slot->sl_seqid = seq->seqid;
 	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4371,6 +4424,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_ATOMIC))) {
 			session->se_fchannel.maxreqs += 1;
+			atomic_add(session->se_fchannel.maxreqs - session->se_target_maxslots,
+				   &nfsd_total_target_slots);
 			session->se_target_maxslots = session->se_fchannel.maxreqs;
 		} else {
 			kfree(slot);
@@ -8779,7 +8834,6 @@ nfs4_state_start_net(struct net *net)
 }
 
 /* initialization to perform when the nfsd service is started: */
-
 int
 nfs4_state_start(void)
 {
@@ -8789,6 +8843,15 @@ nfs4_state_start(void)
 	if (ret)
 		return ret;
 
+	nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
+	if (!nfsd_slot_shrinker) {
+		rhltable_destroy(&nfs4_file_rhltable);
+		return -ENOMEM;
+	}
+	nfsd_slot_shrinker->count_objects = nfsd_slot_count;
+	nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
+	shrinker_register(nfsd_slot_shrinker);
+
 	set_max_delegations();
 	return 0;
 }
@@ -8830,6 +8893,7 @@ void
 nfs4_state_shutdown(void)
 {
 	rhltable_destroy(&nfs4_file_rhltable);
+	shrinker_free(nfsd_slot_shrinker);
 }
 
 static void
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index ea6659d52be2..0e320ba097f2 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -345,6 +345,7 @@ struct nfsd4_session {
 	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
+	struct list_head	se_all_sessions;/* global list of sessions */
 	struct nfs4_client	*se_client;
 	struct nfs4_sessionid	se_sessionid;
 	struct nfsd4_channel_attrs se_fchannel;
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19  0:41 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
@ 2024-11-19 19:14   ` Chuck Lever
  2024-11-19 22:22     ` NeilBrown
  2024-11-19 19:21   ` Chuck Lever
  1 sibling, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-19 19:14 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> Each client now reports the number of slots allocated in each session.

Can this file also report the target slot count? Ie, is the server
matching the client's requested slot count, or is it over or under
by some number?

Would it be useful for a server tester or administrator to poke a
target slot count value into this file and watch the machinery
adjust?


> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 3889ba1c653f..31ff9f92a895 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
>  static int client_info_show(struct seq_file *m, void *v)
>  {
>  	struct inode *inode = file_inode(m->file);
> +	struct nfsd4_session *ses;
>  	struct nfs4_client *clp;
>  	u64 clid;
>  
> @@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
>  	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
>  	seq_printf(m, "admin-revoked states: %d\n",
>  		   atomic_read(&clp->cl_admin_revoked));
> +	seq_printf(m, "session slots:");
> +	spin_lock(&clp->cl_lock);
> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> +		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> +	spin_unlock(&clp->cl_lock);
> +	seq_puts(m, "\n");
> +
>  	drop_client(clp);
>  
>  	return 0;
> -- 
> 2.47.0
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-11-19  0:41 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
@ 2024-11-19 19:20   ` Chuck Lever
  2024-11-19 22:27     ` NeilBrown
  2024-11-19 19:34   ` Jeff Layton
  1 sibling, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-19 19:20 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, Nov 19, 2024 at 11:41:31AM +1100, NeilBrown wrote:
> If a client ever uses the highest available slot for a given session,
> attempt to allocate another slot so there is room for the client to use
> more slots if wanted.  GFP_NOWAIT is used so if there is not plenty of
> free memory, failure is expected - which is what we want.  It also
> allows the allocation while holding a spinlock.
> 
> We would expect to stablise with one more slot available than the client
> actually uses.

Which begs the question "why have a 2048 slot maximum session slot
table size?" 1025 might work too. But is there a need for any
maximum at all, or is this just a sanity check?


> Now that we grow the slot table on demand we can start with a smaller
> allocation.  Define NFSD_MAX_INITIAL_SLOTS and allocate at most that
> many when session is created.

Maybe NFSD_DEFAULT_INITIAL_SLOTS is more descriptive?


> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 32 ++++++++++++++++++++++++++------
>  fs/nfsd/state.h     |  2 ++
>  2 files changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 31ff9f92a895..fb522165b376 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1956,7 +1956,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
>  		goto out_free;
>  
> -	for (i = 1; i < numslots; i++) {
> +	for (i = 1; i < numslots && i < NFSD_MAX_INITIAL_SLOTS; i++) {
>  		slot = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
>  		if (!slot)
>  			break;
> @@ -4248,11 +4248,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	slot = xa_load(&session->se_slots, seq->slotid);
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
> -	/* We do not negotiate the number of slots yet, so set the
> -	 * maxslots to the session maxreqs which is used to encode
> -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> -	seq->maxslots = session->se_fchannel.maxreqs;
> -
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>  	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>  					slot->sl_flags & NFSD4_SLOT_INUSE);
> @@ -4302,6 +4297,31 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	cstate->session = session;
>  	cstate->clp = clp;
>  
> +	/*
> +	 * If the client ever uses the highest available slot,
> +	 * gently try to allocate another one.
> +	 */
> +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> +		int s = session->se_fchannel.maxreqs;
> +
> +		/*
> +		 * GFP_NOWAIT is a low-priority non-blocking allocation
> +		 * which can be used under client_lock and only succeeds
> +		 * if there is plenty of memory.
> +		 * Use GFP_ATOMIC which is higher priority for xa_store()
> +		 * so we are less likely to waste the effort of the first
> +		 * allocation.

IIUC, GFP_ATOMIC allocations come from a special pool. I don't think
we want that here. I'd rather stick with NORETRY or KERNEL.


> +		 */
> +		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> +						GFP_ATOMIC)))
> +			session->se_fchannel.maxreqs += 1;
> +		else
> +			kfree(slot);
> +	}
> +	seq->maxslots = session->se_fchannel.maxreqs;
> +
>  out:
>  	switch (clp->cl_cb_state) {
>  	case NFSD4_CB_DOWN:
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e97626916a68..a14a823670e9 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -249,6 +249,8 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
>   * get good throughput on high-latency servers.
>   */
>  #define NFSD_MAX_SLOTS_PER_SESSION	2048
> +/* Maximum number of slots per session to allocate for CREATE_SESSION */
> +#define NFSD_MAX_INITIAL_SLOTS		32

The first couple of patches did so nicely at ruthlessly discarding a
lot of arbitrary logic. I'm not convinced by the patch description
that the INITIAL_SLOTS complexity is needed...


>  /* Maximum  session per slot cache size */
>  #define NFSD_SLOT_CACHE_SIZE		2048
>  /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
> -- 
> 2.47.0
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19  0:41 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
  2024-11-19 19:14   ` Chuck Lever
@ 2024-11-19 19:21   ` Chuck Lever
  2024-11-19 22:24     ` NeilBrown
  1 sibling, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-19 19:21 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> Each client now reports the number of slots allocated in each session.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 8 ++++++++
>  1 file changed, 8 insertions(+)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 3889ba1c653f..31ff9f92a895 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
>  static int client_info_show(struct seq_file *m, void *v)
>  {
>  	struct inode *inode = file_inode(m->file);
> +	struct nfsd4_session *ses;
>  	struct nfs4_client *clp;
>  	u64 clid;
>  
> @@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
>  	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
>  	seq_printf(m, "admin-revoked states: %d\n",
>  		   atomic_read(&clp->cl_admin_revoked));
> +	seq_printf(m, "session slots:");
> +	spin_lock(&clp->cl_lock);
> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> +		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> +	spin_unlock(&clp->cl_lock);
> +	seq_puts(m, "\n");
> +

Also, I wonder if information about the backchannel session can be
surfaced in this way?


>  	drop_client(clp);
>  
>  	return 0;
> -- 
> 2.47.0
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2024-11-19 19:25   ` Chuck Lever
  2024-11-19 22:35     ` NeilBrown
  2024-11-19 19:48   ` Jeff Layton
  1 sibling, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-19 19:25 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fb522165b376..0625b0aec6b8 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> +			 GFP_ATOMIC);

Again... ATOMIC is probably not what we want here, even if it is
only documentary.

And, I thought we determined that an unretired slot had a sequence
number that is reset. Why save the slot's seqid? If I'm missing
something, the comment here should be bolstered to explain it.


>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> @@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * gently try to allocate another one.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
> +		void *prev_slot;
>  
>  		/*
>  		 * GFP_NOWAIT is a low-priority non-blocking allocation
> @@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		 * allocation.
>  		 */
>  		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		prev_slot = xa_load(&session->se_slots, s);
> +		if (xa_is_value(prev_slot) && slot) {
> +			slot->sl_seqid = xa_to_value(prev_slot);
> +			slot->sl_flags |= NFSD4_SLOT_REUSED;
> +		}
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> -						GFP_ATOMIC)))
> +						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> -		else
> +			session->se_target_maxslots = session->se_fchannel.maxreqs;
> +		} else {
>  			kfree(slot);
> +		}
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 5c79494bd20b..b281a2198ff3 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index a14a823670e9..ea6659d52be2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -268,7 +268,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -350,6 +352,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  
> -- 
> 2.47.0
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session
  2024-11-19  0:41 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
@ 2024-11-19 19:28   ` Chuck Lever
  2024-11-19 22:41     ` NeilBrown
  2024-11-19 21:17   ` Jeff Layton
  1 sibling, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-19 19:28 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, Nov 19, 2024 at 11:41:33AM +1100, NeilBrown wrote:
> Add a shrinker which frees unused slots and may ask the clients to use
> fewer slots on each session.
> 
> Each session now tracks se_client_maxreqs which is the most recent
> max-requests-in-use reported by the client, and se_target_maxreqs which
> is a target number of requests which is reduced by the shrinker.
> 
> The shrinker iterates over all sessions on all client in all
> net-namespaces and reduces the target by 1 for each.  The shrinker may
> get called multiple times to reduce by more than 1 each.
> 
> If se_target_maxreqs is above se_client_maxreqs, those slots can be
> freed immediately.  If not the client will be ask to reduce its usage
> and as the usage goes down slots will be freed.
> 
> Once the usage has dropped to match the target, the target can be
> increased if the client uses all available slots and if a GFP_NOWAIT
> allocation succeeds.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 72 ++++++++++++++++++++++++++++++++++++++++++---
>  fs/nfsd/state.h     |  1 +
>  2 files changed, 69 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0625b0aec6b8..ac49c3bd0dcb 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
>   */
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
> +static struct shrinker *nfsd_slot_shrinker;
> +static DEFINE_SPINLOCK(nfsd_session_list_lock);
> +static LIST_HEAD(nfsd_session_list);
> +/* The sum of "target_slots-1" on every session.  The shrinker can push this
> + * down, though it can take a little while for the memory to actually
> + * be freed.  The "-1" is because we can never free slot 0 while the
> + * session is active.
> + */
> +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
> +
>  static void
>  free_session_slots(struct nfsd4_session *ses, int from)
>  {
> @@ -1931,11 +1941,14 @@ free_session_slots(struct nfsd4_session *ses, int from)
>  		kfree(slot);
>  	}
>  	ses->se_fchannel.maxreqs = from;
> -	if (ses->se_target_maxslots > from)
> -		ses->se_target_maxslots = from;
> +	if (ses->se_target_maxslots > from) {
> +		int new_target = from ?: 1;
> +		atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
> +		ses->se_target_maxslots = new_target;
> +	}
>  }
>  
> -static int __maybe_unused
> +static int
>  reduce_session_slots(struct nfsd4_session *ses, int dec)
>  {
>  	struct nfsd_net *nn = net_generic(ses->se_client->net,
> @@ -1948,6 +1961,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec)
>  		return ret;
>  	ret = min(dec, ses->se_target_maxslots-1);
>  	ses->se_target_maxslots -= ret;
> +	atomic_sub(ret, &nfsd_total_target_slots);
>  	ses->se_slot_gen += 1;
>  	if (ses->se_slot_gen == 0) {
>  		int i;
> @@ -2006,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>  	new->se_target_maxslots = i;
> +	atomic_add(i - 1, &nfsd_total_target_slots);
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2130,6 +2145,36 @@ static void free_session(struct nfsd4_session *ses)
>  	__free_session(ses);
>  }
>  
> +static unsigned long
> +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
> +{
> +	unsigned long cnt = atomic_read(&nfsd_total_target_slots);
> +
> +	return cnt ? cnt : SHRINK_EMPTY;
> +}
> +
> +static unsigned long
> +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
> +{
> +	struct nfsd4_session *ses;
> +	unsigned long scanned = 0;
> +	unsigned long freed = 0;
> +
> +	spin_lock(&nfsd_session_list_lock);
> +	list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
> +		freed += reduce_session_slots(ses, 1);
> +		scanned += 1;
> +		if (scanned >= sc->nr_to_scan) {
> +			/* Move starting point for next scan */
> +			list_move(&nfsd_session_list, &ses->se_all_sessions);
> +			break;
> +		}
> +	}
> +	spin_unlock(&nfsd_session_list_lock);
> +	sc->nr_scanned = scanned;
> +	return freed;
> +}
> +
>  static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
>  {
>  	int idx;
> @@ -2154,6 +2199,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
>  	list_add(&new->se_perclnt, &clp->cl_sessions);
>  	spin_unlock(&clp->cl_lock);
>  
> +	spin_lock(&nfsd_session_list_lock);
> +	list_add_tail(&new->se_all_sessions, &nfsd_session_list);
> +	spin_unlock(&nfsd_session_list_lock);
> +
>  	{
>  		struct sockaddr *sa = svc_addr(rqstp);
>  		/*
> @@ -2223,6 +2272,9 @@ unhash_session(struct nfsd4_session *ses)
>  	spin_lock(&ses->se_client->cl_lock);
>  	list_del(&ses->se_perclnt);
>  	spin_unlock(&ses->se_client->cl_lock);
> +	spin_lock(&nfsd_session_list_lock);
> +	list_del(&ses->se_all_sessions);
> +	spin_unlock(&nfsd_session_list_lock);
>  }
>  
>  /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
> @@ -4335,6 +4387,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	slot->sl_seqid = seq->seqid;
>  	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
> +	slot->sl_generation = session->se_slot_gen;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>  	else
> @@ -4371,6 +4424,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
>  						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> +			atomic_add(session->se_fchannel.maxreqs - session->se_target_maxslots,
> +				   &nfsd_total_target_slots);
>  			session->se_target_maxslots = session->se_fchannel.maxreqs;
>  		} else {
>  			kfree(slot);
> @@ -8779,7 +8834,6 @@ nfs4_state_start_net(struct net *net)
>  }
>  
>  /* initialization to perform when the nfsd service is started: */
> -
>  int
>  nfs4_state_start(void)
>  {
> @@ -8789,6 +8843,15 @@ nfs4_state_start(void)
>  	if (ret)
>  		return ret;
>  
> +	nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
> +	if (!nfsd_slot_shrinker) {
> +		rhltable_destroy(&nfs4_file_rhltable);
> +		return -ENOMEM;
> +	}
> +	nfsd_slot_shrinker->count_objects = nfsd_slot_count;
> +	nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
> +	shrinker_register(nfsd_slot_shrinker);
> +
>  	set_max_delegations();
>  	return 0;
>  }
> @@ -8830,6 +8893,7 @@ void
>  nfs4_state_shutdown(void)
>  {
>  	rhltable_destroy(&nfs4_file_rhltable);
> +	shrinker_free(nfsd_slot_shrinker);
>  }
>  
>  static void
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index ea6659d52be2..0e320ba097f2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -345,6 +345,7 @@ struct nfsd4_session {
>  	bool			se_dead;
>  	struct list_head	se_hash;	/* hash by sessionid */
>  	struct list_head	se_perclnt;
> +	struct list_head	se_all_sessions;/* global list of sessions */

I think my only minor issue here is whether we truly want an
"all_sessions" list. Since we don't expect the shrinker to run very
often, isn't there another mechanism that can already iterate all
clients and their sessions?


>  	struct nfs4_client	*se_client;
>  	struct nfs4_sessionid	se_sessionid;
>  	struct nfsd4_channel_attrs se_fchannel;
> -- 
> 2.47.0
> 

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-11-19  0:41 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
  2024-11-19 19:20   ` Chuck Lever
@ 2024-11-19 19:34   ` Jeff Layton
  1 sibling, 0 replies; 47+ messages in thread
From: Jeff Layton @ 2024-11-19 19:34 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> If a client ever uses the highest available slot for a given session,
> attempt to allocate another slot so there is room for the client to use
> more slots if wanted.  GFP_NOWAIT is used so if there is not plenty of
> free memory, failure is expected - which is what we want.  It also
> allows the allocation while holding a spinlock.
> 
> We would expect to stablise with one more slot available than the client
> actually uses.
> 
> Now that we grow the slot table on demand we can start with a smaller
> allocation.  Define NFSD_MAX_INITIAL_SLOTS and allocate at most that
> many when session is created.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 32 ++++++++++++++++++++++++++------
>  fs/nfsd/state.h     |  2 ++
>  2 files changed, 28 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 31ff9f92a895..fb522165b376 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1956,7 +1956,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
>  		goto out_free;
>  
> -	for (i = 1; i < numslots; i++) {
> +	for (i = 1; i < numslots && i < NFSD_MAX_INITIAL_SLOTS; i++) {

nit: maybe just clamp numslots at NFSD_MAX_INITIAL_SLOTS?

>  		slot = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
>  		if (!slot)
>  			break;
> @@ -4248,11 +4248,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	slot = xa_load(&session->se_slots, seq->slotid);
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
> -	/* We do not negotiate the number of slots yet, so set the
> -	 * maxslots to the session maxreqs which is used to encode
> -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> -	seq->maxslots = session->se_fchannel.maxreqs;
> -
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>  	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>  					slot->sl_flags & NFSD4_SLOT_INUSE);
> @@ -4302,6 +4297,31 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	cstate->session = session;
>  	cstate->clp = clp;
>  
> +	/*
> +	 * If the client ever uses the highest available slot,
> +	 * gently try to allocate another one.
> +	 */
> +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> +		int s = session->se_fchannel.maxreqs;
> +
> +		/*
> +		 * GFP_NOWAIT is a low-priority non-blocking allocation
> +		 * which can be used under client_lock and only succeeds
> +		 * if there is plenty of memory.
> +		 * Use GFP_ATOMIC which is higher priority for xa_store()
> +		 * so we are less likely to waste the effort of the first
> +		 * allocation.
> +		 */
> +		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> +						GFP_ATOMIC)))
> +			session->se_fchannel.maxreqs += 1;
> +		else
> +			kfree(slot);
> +	}
> +	seq->maxslots = session->se_fchannel.maxreqs;
> +
>  out:
>  	switch (clp->cl_cb_state) {
>  	case NFSD4_CB_DOWN:
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index e97626916a68..a14a823670e9 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -249,6 +249,8 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
>   * get good throughput on high-latency servers.
>   */
>  #define NFSD_MAX_SLOTS_PER_SESSION	2048
> +/* Maximum number of slots per session to allocate for CREATE_SESSION */
> +#define NFSD_MAX_INITIAL_SLOTS		32
>  /* Maximum  session per slot cache size */
>  #define NFSD_SLOT_CACHE_SIZE		2048
>  /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
  2024-11-19 19:25   ` Chuck Lever
@ 2024-11-19 19:48   ` Jeff Layton
  1 sibling, 0 replies; 47+ messages in thread
From: Jeff Layton @ 2024-11-19 19:48 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 

Personally, I think that resetting to 1 is the only sane choice. After
shrinking the slot table, either side is free to forget the slot
information. When the slot is resurrected, we need to treat it as a new
slot. Expecting the server to remember all seqids, and their cached
replies for all slots ever used on a session seems like an open-ended
mandate.

That said, I'm ok with the server being accepting here, in case there
are client implementations that have done it the other way. Some clear
guidance from the RFCs would sure be nice though.

> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 76 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fb522165b376..0625b0aec6b8 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> +			 GFP_ATOMIC);
>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1967,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2080,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -3687,10 +3726,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3699,6 +3738,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4249,8 +4290,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4275,6 +4315,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4285,8 +4331,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> @@ -4302,8 +4349,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * gently try to allocate another one.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
> +		void *prev_slot;
>  
>  		/*
>  		 * GFP_NOWAIT is a low-priority non-blocking allocation
> @@ -4314,13 +4363,21 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		 * allocation.
>  		 */
>  		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> +		prev_slot = xa_load(&session->se_slots, s);
> +		if (xa_is_value(prev_slot) && slot) {
> +			slot->sl_seqid = xa_to_value(prev_slot);
> +			slot->sl_flags |= NFSD4_SLOT_REUSED;
> +		}
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> -						GFP_ATOMIC)))
> +						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> -		else
> +			session->se_target_maxslots = session->se_fchannel.maxreqs;
> +		} else {
>  			kfree(slot);
> +		}
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 5c79494bd20b..b281a2198ff3 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1905,7 +1905,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -5054,7 +5055,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index a14a823670e9..ea6659d52be2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -268,7 +268,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -350,6 +352,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session
  2024-11-19  0:41 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
  2024-11-19 19:28   ` Chuck Lever
@ 2024-11-19 21:17   ` Jeff Layton
  2024-11-19 22:47     ` NeilBrown
  1 sibling, 1 reply; 47+ messages in thread
From: Jeff Layton @ 2024-11-19 21:17 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> Add a shrinker which frees unused slots and may ask the clients to use
> fewer slots on each session.
> 
> Each session now tracks se_client_maxreqs which is the most recent
> max-requests-in-use reported by the client, and se_target_maxreqs which
> is a target number of requests which is reduced by the shrinker.
> 
> The shrinker iterates over all sessions on all client in all
> net-namespaces and reduces the target by 1 for each.  The shrinker may
> get called multiple times to reduce by more than 1 each.
> 
> If se_target_maxreqs is above se_client_maxreqs, those slots can be
> freed immediately.  If not the client will be ask to reduce its usage
> and as the usage goes down slots will be freed.
> 
> Once the usage has dropped to match the target, the target can be
> increased if the client uses all available slots and if a GFP_NOWAIT
> allocation succeeds.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 72 ++++++++++++++++++++++++++++++++++++++++++---
>  fs/nfsd/state.h     |  1 +
>  2 files changed, 69 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 0625b0aec6b8..ac49c3bd0dcb 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
>   */
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
> +static struct shrinker *nfsd_slot_shrinker;
> +static DEFINE_SPINLOCK(nfsd_session_list_lock);
> +static LIST_HEAD(nfsd_session_list);
> +/* The sum of "target_slots-1" on every session.  The shrinker can push this
> + * down, though it can take a little while for the memory to actually
> + * be freed.  The "-1" is because we can never free slot 0 while the
> + * session is active.
> + */
> +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
> +
>  static void
>  free_session_slots(struct nfsd4_session *ses, int from)
>  {
> @@ -1931,11 +1941,14 @@ free_session_slots(struct nfsd4_session *ses, int from)
>  		kfree(slot);
>  	}
>  	ses->se_fchannel.maxreqs = from;
> -	if (ses->se_target_maxslots > from)
> -		ses->se_target_maxslots = from;
> +	if (ses->se_target_maxslots > from) {
> +		int new_target = from ?: 1;

Let's make that "from ? from : 1". The above is a non-standard gcc-ism
(AIUI).

> +		atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots);
> +		ses->se_target_maxslots = new_target;
> +	}
>  }
>  
> -static int __maybe_unused
> +static int
>  reduce_session_slots(struct nfsd4_session *ses, int dec)
>  {
>  	struct nfsd_net *nn = net_generic(ses->se_client->net,
> @@ -1948,6 +1961,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec)
>  		return ret;
>  	ret = min(dec, ses->se_target_maxslots-1);
>  	ses->se_target_maxslots -= ret;
> +	atomic_sub(ret, &nfsd_total_target_slots);
>  	ses->se_slot_gen += 1;
>  	if (ses->se_slot_gen == 0) {
>  		int i;
> @@ -2006,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>  	new->se_target_maxslots = i;
> +	atomic_add(i - 1, &nfsd_total_target_slots);
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2130,6 +2145,36 @@ static void free_session(struct nfsd4_session *ses)
>  	__free_session(ses);
>  }
>  
> +static unsigned long
> +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc)
> +{
> +	unsigned long cnt = atomic_read(&nfsd_total_target_slots);
> +
> +	return cnt ? cnt : SHRINK_EMPTY;
> +}
> +
> +static unsigned long
> +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc)
> +{
> +	struct nfsd4_session *ses;
> +	unsigned long scanned = 0;
> +	unsigned long freed = 0;
> +
> +	spin_lock(&nfsd_session_list_lock);
> +	list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) {
> +		freed += reduce_session_slots(ses, 1);
> +		scanned += 1;
> +		if (scanned >= sc->nr_to_scan) {
> +			/* Move starting point for next scan */
> +			list_move(&nfsd_session_list, &ses->se_all_sessions);
> +			break;
> +		}
> +	}
> +	spin_unlock(&nfsd_session_list_lock);
> +	sc->nr_scanned = scanned;
> +	return freed;
> +}
> +
>  static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
>  {
>  	int idx;
> @@ -2154,6 +2199,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
>  	list_add(&new->se_perclnt, &clp->cl_sessions);
>  	spin_unlock(&clp->cl_lock);
>  
> +	spin_lock(&nfsd_session_list_lock);
> +	list_add_tail(&new->se_all_sessions, &nfsd_session_list);
> +	spin_unlock(&nfsd_session_list_lock);
> +
>  	{
>  		struct sockaddr *sa = svc_addr(rqstp);
>  		/*
> @@ -2223,6 +2272,9 @@ unhash_session(struct nfsd4_session *ses)
>  	spin_lock(&ses->se_client->cl_lock);
>  	list_del(&ses->se_perclnt);
>  	spin_unlock(&ses->se_client->cl_lock);
> +	spin_lock(&nfsd_session_list_lock);
> +	list_del(&ses->se_all_sessions);
> +	spin_unlock(&nfsd_session_list_lock);
>  }
>  
>  /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
> @@ -4335,6 +4387,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	slot->sl_seqid = seq->seqid;
>  	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
> +	slot->sl_generation = session->se_slot_gen;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>  	else
> @@ -4371,6 +4424,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
>  						GFP_ATOMIC))) {
>  			session->se_fchannel.maxreqs += 1;
> +			atomic_add(session->se_fchannel.maxreqs - session->se_target_maxslots,
> +				   &nfsd_total_target_slots);
>  			session->se_target_maxslots = session->se_fchannel.maxreqs;
>  		} else {
>  			kfree(slot);
> @@ -8779,7 +8834,6 @@ nfs4_state_start_net(struct net *net)
>  }
>  
>  /* initialization to perform when the nfsd service is started: */
> -
>  int
>  nfs4_state_start(void)
>  {
> @@ -8789,6 +8843,15 @@ nfs4_state_start(void)
>  	if (ret)
>  		return ret;
>  
> +	nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot");
> +	if (!nfsd_slot_shrinker) {
> +		rhltable_destroy(&nfs4_file_rhltable);
> +		return -ENOMEM;
> +	}
> +	nfsd_slot_shrinker->count_objects = nfsd_slot_count;
> +	nfsd_slot_shrinker->scan_objects = nfsd_slot_scan;
> +	shrinker_register(nfsd_slot_shrinker);
> +
>  	set_max_delegations();
>  	return 0;
>  }
> @@ -8830,6 +8893,7 @@ void
>  nfs4_state_shutdown(void)
>  {
>  	rhltable_destroy(&nfs4_file_rhltable);
> +	shrinker_free(nfsd_slot_shrinker);
>  }
>  
>  static void
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index ea6659d52be2..0e320ba097f2 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -345,6 +345,7 @@ struct nfsd4_session {
>  	bool			se_dead;
>  	struct list_head	se_hash;	/* hash by sessionid */
>  	struct list_head	se_perclnt;
> +	struct list_head	se_all_sessions;/* global list of sessions */
>  	struct nfs4_client	*se_client;
>  	struct nfs4_sessionid	se_sessionid;
>  	struct nfsd4_channel_attrs se_fchannel;

-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand
  2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
                   ` (5 preceding siblings ...)
  2024-11-19  0:41 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
@ 2024-11-19 21:31 ` Jeff Layton
  2024-11-19 22:52   ` NeilBrown
  6 siblings, 1 reply; 47+ messages in thread
From: Jeff Layton @ 2024-11-19 21:31 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> Here is v2 of my series for on-demand allocation and freeing of session DRC slots.
> 
> - Now uses an xarray to store slots, and the limit is raised to 2048
> - delays retiring a slot until the client has confirmed that it isn't
>   using it as described in RFC:
> 
>       The replier SHOULD retain the slots it wants to retire until the
>       requester sends a request with a highest_slotid less than or equal
>       to the replier's new enforced highest_slotid.
> 
> - When a retired slot is used, allow the seqid to be the next in sequence
>   as required by the RFC:
> 
>          Each time a slot is reused, the request MUST specify a sequence
>          ID that is one greater than that of the previous request on the
>          slot.
>
>   or "1" as (arguably) allowed by the RFC:
> 
>          The first time a slot is used, the requester MUST specify a
>          sequence ID of one
> 

I thought that the conclusion of the IETF discussion was that we should
reset this to 1. It'd be ideal to just do that, as then we wouldn't
need NFSD4_SLOT_REUSED.

Are there any clients that expect to reuse the old seqid in this
situation? I know the Linux client doesn't. Do Solaris or FreeBSD?

> - current slot allocation is now reported in /proc/fs/nfsd/clients/*/info
> 
> This has been tested with highly aggressive shrinker settings:
> 	nfsd_slot_shrinker->seeks = 0;
> 	nfsd_slot_shrinker->batch = 2;
> 
> and with periodic "echo 3 > drop_caches".  The slot count drops as
> expected and then increases again.
> 

This is really great work, Neil!
--
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19 19:14   ` Chuck Lever
@ 2024-11-19 22:22     ` NeilBrown
  2024-11-20  0:21       ` Chuck Lever
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:22 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> > Each client now reports the number of slots allocated in each session.
> 
> Can this file also report the target slot count? Ie, is the server
> matching the client's requested slot count, or is it over or under
> by some number?

I could.  Would you like to suggest a syntax?
Usually the numbers would be the same except for short transition
periods, so I'm not convinced of the value.

Currently if the target is reduced while the client is idle there can be
a longer delay before the slots are actually freed, but I think 2
lease-renewal SEQUENCE ops would do it.  If/when we add use of the
CB_RECALL_SLOT callback the delay should disappear.

> 
> Would it be useful for a server tester or administrator to poke a
> target slot count value into this file and watch the machinery
> adjust?

Maybe.  By echo 3 > drop_caches does a pretty good job.  I don't see
that we need more.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19 19:21   ` Chuck Lever
@ 2024-11-19 22:24     ` NeilBrown
  2024-11-20  0:25       ` Chuck Lever
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:24 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> > Each client now reports the number of slots allocated in each session.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 8 ++++++++
> >  1 file changed, 8 insertions(+)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 3889ba1c653f..31ff9f92a895 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
> >  static int client_info_show(struct seq_file *m, void *v)
> >  {
> >  	struct inode *inode = file_inode(m->file);
> > +	struct nfsd4_session *ses;
> >  	struct nfs4_client *clp;
> >  	u64 clid;
> >  
> > @@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
> >  	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
> >  	seq_printf(m, "admin-revoked states: %d\n",
> >  		   atomic_read(&clp->cl_admin_revoked));
> > +	seq_printf(m, "session slots:");
> > +	spin_lock(&clp->cl_lock);
> > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > +		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > +	spin_unlock(&clp->cl_lock);
> > +	seq_puts(m, "\n");
> > +
> 
> Also, I wonder if information about the backchannel session can be
> surfaced in this way?
> 

Probably make sense.  Maybe we should invent a syntax for reporting
arbitrary info about each session.

   session %d slots: %d
   session %d cb-slots: %d
   ...

???

NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-11-19 19:20   ` Chuck Lever
@ 2024-11-19 22:27     ` NeilBrown
  2024-11-20  0:32       ` Chuck Lever
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:27 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:31AM +1100, NeilBrown wrote:
> > If a client ever uses the highest available slot for a given session,
> > attempt to allocate another slot so there is room for the client to use
> > more slots if wanted.  GFP_NOWAIT is used so if there is not plenty of
> > free memory, failure is expected - which is what we want.  It also
> > allows the allocation while holding a spinlock.
> > 
> > We would expect to stablise with one more slot available than the client
> > actually uses.
> 
> Which begs the question "why have a 2048 slot maximum session slot
> table size?" 1025 might work too. But is there a need for any
> maximum at all, or is this just a sanity check?

Linux NFS presumably isn't the only client, and it might change in the
future.  Maybe there is no need for a maximum.  It was mostly as a
sanity check.

It wouldn't take much to convince me to remove the limit.

> 
> 
> > Now that we grow the slot table on demand we can start with a smaller
> > allocation.  Define NFSD_MAX_INITIAL_SLOTS and allocate at most that
> > many when session is created.
> 
> Maybe NFSD_DEFAULT_INITIAL_SLOTS is more descriptive?

I don't think "DEFAULT" is the right word.  The client requests a number
of slots.  That is the "Default".  The server can impose a limit - a
maximum.
Maybe we don't need a limit here either?

Thanks,
NeilBrown


> 
> 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 32 ++++++++++++++++++++++++++------
> >  fs/nfsd/state.h     |  2 ++
> >  2 files changed, 28 insertions(+), 6 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 31ff9f92a895..fb522165b376 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1956,7 +1956,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >  	if (!slot || xa_is_err(xa_store(&new->se_slots, 0, slot, GFP_KERNEL)))
> >  		goto out_free;
> >  
> > -	for (i = 1; i < numslots; i++) {
> > +	for (i = 1; i < numslots && i < NFSD_MAX_INITIAL_SLOTS; i++) {
> >  		slot = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
> >  		if (!slot)
> >  			break;
> > @@ -4248,11 +4248,6 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	slot = xa_load(&session->se_slots, seq->slotid);
> >  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >  
> > -	/* We do not negotiate the number of slots yet, so set the
> > -	 * maxslots to the session maxreqs which is used to encode
> > -	 * sr_highest_slotid and the sr_target_slot id to maxslots */
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > -
> >  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> >  	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> >  					slot->sl_flags & NFSD4_SLOT_INUSE);
> > @@ -4302,6 +4297,31 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >  	cstate->session = session;
> >  	cstate->clp = clp;
> >  
> > +	/*
> > +	 * If the client ever uses the highest available slot,
> > +	 * gently try to allocate another one.
> > +	 */
> > +	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> > +		int s = session->se_fchannel.maxreqs;
> > +
> > +		/*
> > +		 * GFP_NOWAIT is a low-priority non-blocking allocation
> > +		 * which can be used under client_lock and only succeeds
> > +		 * if there is plenty of memory.
> > +		 * Use GFP_ATOMIC which is higher priority for xa_store()
> > +		 * so we are less likely to waste the effort of the first
> > +		 * allocation.
> 
> IIUC, GFP_ATOMIC allocations come from a special pool. I don't think
> we want that here. I'd rather stick with NORETRY or KERNEL.
> 
> 
> > +		 */
> > +		slot = kzalloc(slot_bytes(&session->se_fchannel), GFP_NOWAIT);
> > +		if (slot && !xa_is_err(xa_store(&session->se_slots, s, slot,
> > +						GFP_ATOMIC)))
> > +			session->se_fchannel.maxreqs += 1;
> > +		else
> > +			kfree(slot);
> > +	}
> > +	seq->maxslots = session->se_fchannel.maxreqs;
> > +
> >  out:
> >  	switch (clp->cl_cb_state) {
> >  	case NFSD4_CB_DOWN:
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index e97626916a68..a14a823670e9 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -249,6 +249,8 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
> >   * get good throughput on high-latency servers.
> >   */
> >  #define NFSD_MAX_SLOTS_PER_SESSION	2048
> > +/* Maximum number of slots per session to allocate for CREATE_SESSION */
> > +#define NFSD_MAX_INITIAL_SLOTS		32
> 
> The first couple of patches did so nicely at ruthlessly discarding a
> lot of arbitrary logic. I'm not convinced by the patch description
> that the INITIAL_SLOTS complexity is needed...
> 
> 
> >  /* Maximum  session per slot cache size */
> >  #define NFSD_SLOT_CACHE_SIZE		2048
> >  /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
> > -- 
> > 2.47.0
> > 
> 
> -- 
> Chuck Lever
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19 19:25   ` Chuck Lever
@ 2024-11-19 22:35     ` NeilBrown
  2024-11-20  1:27       ` Chuck Lever
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:35 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> >  fs/nfsd/nfs4xdr.c   |  5 +--
> >  fs/nfsd/state.h     |  4 +++
> >  fs/nfsd/xdr4.h      |  2 --
> >  4 files changed, 76 insertions(+), 16 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fb522165b376..0625b0aec6b8 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >  
> >  static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >  {
> >  	int i;
> >  
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >  
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > +			 GFP_ATOMIC);
> 
> Again... ATOMIC is probably not what we want here, even if it is
> only documentary.

Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
a warning.

> 
> And, I thought we determined that an unretired slot had a sequence
> number that is reset. Why save the slot's seqid? If I'm missing
> something, the comment here should be bolstered to explain it.

It isn't clear to me that we determined that - only the some people
asserted it.  Until the spec is clarified I think it is safest to be
cautious.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session
  2024-11-19 19:28   ` Chuck Lever
@ 2024-11-19 22:41     ` NeilBrown
  0 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:41 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Tue, Nov 19, 2024 at 11:41:33AM +1100, NeilBrown wrote:
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index ea6659d52be2..0e320ba097f2 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -345,6 +345,7 @@ struct nfsd4_session {
> >  	bool			se_dead;
> >  	struct list_head	se_hash;	/* hash by sessionid */
> >  	struct list_head	se_perclnt;
> > +	struct list_head	se_all_sessions;/* global list of sessions */
> 
> I think my only minor issue here is whether we truly want an
> "all_sessions" list. Since we don't expect the shrinker to run very
> often, isn't there another mechanism that can already iterate all
> clients and their sessions?

"all_sessions" certainly isn't my favourite part of the set.
But I do think we need it.

We can iterate all sessions by iterating all net-namespaces, then all
clients, then all sessions.  But that isn't what we need.

The shrinker mechanism seems to assume an LRU.  It makes "scan" requests
one "batch" at a time, and may request several batches in sequence
without telling you in advance how many batches to expect.  So you need
some concept of the "next" thing to free.  Often this is the end of the
LRU.
But we don't have an LRU because the slots aren't a cache.

An important detail is that when nfsd_slot_scan() has scanned all that
it was asked, it moves the head to the current point in the list.  So
the next time it is called it will start with the correct next session.

This will only become important where there are more than 64 (default
batch size) sessions.

NeilBrown

> 
> 
> >  	struct nfs4_client	*se_client;
> >  	struct nfs4_sessionid	se_sessionid;
> >  	struct nfsd4_channel_attrs se_fchannel;
> > -- 
> > 2.47.0
> > 
> 
> -- 
> Chuck Lever
> 

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session
  2024-11-19 21:17   ` Jeff Layton
@ 2024-11-19 22:47     ` NeilBrown
  0 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:47 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Jeff Layton wrote:
> On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> > Add a shrinker which frees unused slots and may ask the clients to use
> > fewer slots on each session.
> > 
> > Each session now tracks se_client_maxreqs which is the most recent
> > max-requests-in-use reported by the client, and se_target_maxreqs which
> > is a target number of requests which is reduced by the shrinker.
> > 
> > The shrinker iterates over all sessions on all client in all
> > net-namespaces and reduces the target by 1 for each.  The shrinker may
> > get called multiple times to reduce by more than 1 each.
> > 
> > If se_target_maxreqs is above se_client_maxreqs, those slots can be
> > freed immediately.  If not the client will be ask to reduce its usage
> > and as the usage goes down slots will be freed.
> > 
> > Once the usage has dropped to match the target, the target can be
> > increased if the client uses all available slots and if a GFP_NOWAIT
> > allocation succeeds.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/nfsd/nfs4state.c | 72 ++++++++++++++++++++++++++++++++++++++++++---
> >  fs/nfsd/state.h     |  1 +
> >  2 files changed, 69 insertions(+), 4 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 0625b0aec6b8..ac49c3bd0dcb 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses)
> >   */
> >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >  
> > +static struct shrinker *nfsd_slot_shrinker;
> > +static DEFINE_SPINLOCK(nfsd_session_list_lock);
> > +static LIST_HEAD(nfsd_session_list);
> > +/* The sum of "target_slots-1" on every session.  The shrinker can push this
> > + * down, though it can take a little while for the memory to actually
> > + * be freed.  The "-1" is because we can never free slot 0 while the
> > + * session is active.
> > + */
> > +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0);
> > +
> >  static void
> >  free_session_slots(struct nfsd4_session *ses, int from)
> >  {
> > @@ -1931,11 +1941,14 @@ free_session_slots(struct nfsd4_session *ses, int from)
> >  		kfree(slot);
> >  	}
> >  	ses->se_fchannel.maxreqs = from;
> > -	if (ses->se_target_maxslots > from)
> > -		ses->se_target_maxslots = from;
> > +	if (ses->se_target_maxslots > from) {
> > +		int new_target = from ?: 1;
> 
> Let's make that "from ? from : 1". The above is a non-standard gcc-ism
> (AIUI).

Let's not.  There are currently 1926 lines in .c and .h files in the
Linux kernel which contain "?:" and another 848 which contain "? :".
I think it is an established part of the kernel style.

This is admittedly dominated by bcachefs, but there is a long tail with
tools, net, crypto, drivers all contributing.  Outside of bcachefs, fs/
contributes only 102 in 29 different filesystems.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand
  2024-11-19 21:31 ` [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand Jeff Layton
@ 2024-11-19 22:52   ` NeilBrown
  0 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-19 22:52 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Jeff Layton wrote:
> On Tue, 2024-11-19 at 11:41 +1100, NeilBrown wrote:
> > Here is v2 of my series for on-demand allocation and freeing of session DRC slots.
> > 
> > - Now uses an xarray to store slots, and the limit is raised to 2048
> > - delays retiring a slot until the client has confirmed that it isn't
> >   using it as described in RFC:
> > 
> >       The replier SHOULD retain the slots it wants to retire until the
> >       requester sends a request with a highest_slotid less than or equal
> >       to the replier's new enforced highest_slotid.
> > 
> > - When a retired slot is used, allow the seqid to be the next in sequence
> >   as required by the RFC:
> > 
> >          Each time a slot is reused, the request MUST specify a sequence
> >          ID that is one greater than that of the previous request on the
> >          slot.
> >
> >   or "1" as (arguably) allowed by the RFC:
> > 
> >          The first time a slot is used, the requester MUST specify a
> >          sequence ID of one
> > 
> 
> I thought that the conclusion of the IETF discussion was that we should
> reset this to 1. It'd be ideal to just do that, as then we wouldn't
> need NFSD4_SLOT_REUSED.

I thought the conclusion was:

  I'm convinced.  The next draft of rfc5661bis will address this issue.

Until the issue is addressed I don't think it would be wise to preempt
the result.

> 
> Are there any clients that expect to reuse the old seqid in this
> situation? I know the Linux client doesn't. Do Solaris or FreeBSD?

I don't know.  But I tend to code the the spec, not to other clients.
I still think the specs says

         Each time a slot is reused, the request MUST specify a sequence
         ID that is one greater than that of the previous request on the
         slot.

and I don't see any good reason not to treat what we are doing here as
"reuse". 

So I'm making a concession to the linux client and to you by allowing
'1'.  I'm still not convinced that it is a good idea.  At best I'm
convinced that the spec can be read to suggest that might be an option.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19 22:22     ` NeilBrown
@ 2024-11-20  0:21       ` Chuck Lever
  0 siblings, 0 replies; 47+ messages in thread
From: Chuck Lever @ 2024-11-20  0:21 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, Nov 20, 2024 at 09:22:59AM +1100, NeilBrown wrote:
> On Wed, 20 Nov 2024, Chuck Lever wrote:
> > On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> > > Each client now reports the number of slots allocated in each session.
> > 
> > Can this file also report the target slot count? Ie, is the server
> > matching the client's requested slot count, or is it over or under
> > by some number?
> 
> I could.  Would you like to suggest a syntax?
> Usually the numbers would be the same except for short transition
> periods, so I'm not convinced of the value.

That's precisely the kind of situation I would like to be able
catch -- the two are unequal longer than expected.


> Currently if the target is reduced while the client is idle there can be
> a longer delay before the slots are actually freed, but I think 2
> lease-renewal SEQUENCE ops would do it.  If/when we add use of the
> CB_RECALL_SLOT callback the delay should disappear.
> 
> > Would it be useful for a server tester or administrator to poke a
> > target slot count value into this file and watch the machinery
> > adjust?
> 
> Maybe.  By echo 3 > drop_caches does a pretty good job.  I don't see
> that we need more.

Fair enough.

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-19 22:24     ` NeilBrown
@ 2024-11-20  0:25       ` Chuck Lever
  2024-11-21 21:03         ` NeilBrown
  0 siblings, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-20  0:25 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, Nov 20, 2024 at 09:24:52AM +1100, NeilBrown wrote:
> On Wed, 20 Nov 2024, Chuck Lever wrote:
> > On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> > > Each client now reports the number of slots allocated in each session.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/nfsd/nfs4state.c | 8 ++++++++
> > >  1 file changed, 8 insertions(+)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index 3889ba1c653f..31ff9f92a895 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
> > >  static int client_info_show(struct seq_file *m, void *v)
> > >  {
> > >  	struct inode *inode = file_inode(m->file);
> > > +	struct nfsd4_session *ses;
> > >  	struct nfs4_client *clp;
> > >  	u64 clid;
> > >  
> > > @@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
> > >  	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
> > >  	seq_printf(m, "admin-revoked states: %d\n",
> > >  		   atomic_read(&clp->cl_admin_revoked));
> > > +	seq_printf(m, "session slots:");
> > > +	spin_lock(&clp->cl_lock);
> > > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > > +		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > > +	spin_unlock(&clp->cl_lock);
> > > +	seq_puts(m, "\n");
> > > +
> > 
> > Also, I wonder if information about the backchannel session can be
> > surfaced in this way?
> > 
> 
> Probably make sense.  Maybe we should invent a syntax for reporting
> arbitrary info about each session.
> 
>    session %d slots: %d
>    session %d cb-slots: %d
>    ...
> 
> ???

If each client has a directory, then it should have a subdirectory
called "sessions". Each subdirectory of "sessions" should be one
session, named by its hex session ID (as it is presented by
Wireshark). Each session directory could have a file for the forward
channel, one for the backchannel, and maybe one for generic
information like when the session was created and how many
connections it has.

We don't need all of that in this patch set, but whatever is
introduced here should be extensible to allow us to add more over
time.

-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-11-19 22:27     ` NeilBrown
@ 2024-11-20  0:32       ` Chuck Lever
  2024-11-21 21:20         ` NeilBrown
  0 siblings, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-20  0:32 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, Nov 20, 2024 at 09:27:51AM +1100, NeilBrown wrote:
> On Wed, 20 Nov 2024, Chuck Lever wrote:
> > On Tue, Nov 19, 2024 at 11:41:31AM +1100, NeilBrown wrote:
> > > If a client ever uses the highest available slot for a given session,
> > > attempt to allocate another slot so there is room for the client to use
> > > more slots if wanted.  GFP_NOWAIT is used so if there is not plenty of
> > > free memory, failure is expected - which is what we want.  It also
> > > allows the allocation while holding a spinlock.
> > > 
> > > We would expect to stablise with one more slot available than the client
> > > actually uses.
> > 
> > Which begs the question "why have a 2048 slot maximum session slot
> > table size?" 1025 might work too. But is there a need for any
> > maximum at all, or is this just a sanity check?
> 
> Linux NFS presumably isn't the only client, and it might change in the
> future.  Maybe there is no need for a maximum.  It was mostly as a
> sanity check.
> 
> It wouldn't take much to convince me to remove the limit.

What's the worse that might happen if there is no cap? Can this be
used as a DoS vector?

If a maximum should be necessary, its value should be clearly
labeled as "not an architectural limit -- for sanity checking only".


> > > Now that we grow the slot table on demand we can start with a smaller
> > > allocation.  Define NFSD_MAX_INITIAL_SLOTS and allocate at most that
> > > many when session is created.
> > 
> > Maybe NFSD_DEFAULT_INITIAL_SLOTS is more descriptive?
> 
> I don't think "DEFAULT" is the right word.  The client requests a number
> of slots.  That is the "Default".  The server can impose a limit - a
> maximum.
> Maybe we don't need a limit here either?

I see. Well I don't think there needs to be a "maximum" number of
initial slots. NFSD can try to allocate the number the client
requested as best it can, until it hits our sane maximum above.

I think sessions should have a minimum number of slots to guarantee
forward progress (or IOW prevent a deadlock). I would say that
number should be larger than 1 -- perhaps 2 or even 4.

The problem with a small initial slot count is that means the
session has a slow start heuristic. That might or might not be
desirable here.


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-19 22:35     ` NeilBrown
@ 2024-11-20  1:27       ` Chuck Lever
  2024-11-21 21:47         ` NeilBrown
  0 siblings, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2024-11-20  1:27 UTC (permalink / raw)
  To: NeilBrown; +Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> On Wed, 20 Nov 2024, Chuck Lever wrote:
> > On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > > Reducing the number of slots in the session slot table requires
> > > confirmation from the client.  This patch adds reduce_session_slots()
> > > which starts the process of getting confirmation, but never calls it.
> > > That will come in a later patch.
> > > 
> > > Before we can free a slot we need to confirm that the client won't try
> > > to use it again.  This involves returning a lower cr_maxrequests in a
> > > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > > is not larger than we limit we are trying to impose.  So for each slot
> > > we need to remember that we have sent a reduced cr_maxrequests.
> > > 
> > > To achieve this we introduce a concept of request "generations".  Each
> > > time we decide to reduce cr_maxrequests we increment the generation
> > > number, and record this when we return the lower cr_maxrequests to the
> > > client.  When a slot with the current generation reports a low
> > > ca_maxrequests, we commit to that level and free extra slots.
> > > 
> > > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > > we iterate all slots and reset the generation number to avoid false matches.
> > > 
> > > When we free a slot we store the seqid in the slot pointer so that it can
> > > be restored when we reactivate the slot.  The RFC can be read as
> > > suggesting that the slot number could restart from one after a slot is
> > > retired and reactivated, but also suggests that retiring slots is not
> > > required.  So when we reactive a slot we accept with the next seqid in
> > > sequence, or 1.
> > > 
> > > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > > matches how it is encoded for the reply.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> > >  fs/nfsd/nfs4xdr.c   |  5 +--
> > >  fs/nfsd/state.h     |  4 +++
> > >  fs/nfsd/xdr4.h      |  2 --
> > >  4 files changed, 76 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > index fb522165b376..0625b0aec6b8 100644
> > > --- a/fs/nfsd/nfs4state.c
> > > +++ b/fs/nfsd/nfs4state.c
> > > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> > >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> > >  
> > >  static void
> > > -free_session_slots(struct nfsd4_session *ses)
> > > +free_session_slots(struct nfsd4_session *ses, int from)
> > >  {
> > >  	int i;
> > >  
> > > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > > +	if (from >= ses->se_fchannel.maxreqs)
> > > +		return;
> > > +
> > > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> > >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > >  
> > > -		xa_erase(&ses->se_slots, i);
> > > +		/*
> > > +		 * Save the seqid in case we reactivate this slot.
> > > +		 * This will never require a memory allocation so GFP
> > > +		 * flag is irrelevant
> > > +		 */
> > > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > > +			 GFP_ATOMIC);
> > 
> > Again... ATOMIC is probably not what we want here, even if it is
> > only documentary.
> 
> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> a warning.

I find using GFP_ATOMIC here to be confusing -- it requests
allocation from special memory reserves and is to be used in
situations where allocation might result in system failure. That is
clearly not the case here, and the resulting memory allocation might
be long-lived.

I see the comment that says memory won't actually be allocated. I'm
not sure that's the way xa_store() works, however.

I don't immediately see another good choice, however. I can reach
out to Matthew and Liam and see if they have a better idea.


> > And, I thought we determined that an unretired slot had a sequence
> > number that is reset. Why save the slot's seqid? If I'm missing
> > something, the comment here should be bolstered to explain it.
> 
> It isn't clear to me that we determined that - only the some people
> asserted it.

From what I've read, everyone else who responded has said "use one".
And they have provided enough spec quotations that 1 seems like the
right initial slot sequence number value, always.

You should trust Tom Talpey's opinion on this. He was directly
involved 25 years ago when sessions were invented in DAFS and then
transferred into the NFSv4.1 protocol.


> Until the spec is clarified I think it is safest to be cautious.

The usual line we draw for adding code/features/complexity is the
proposer must demonstrate a use case for it. So far I have not seen
a client implementation that needs a server to remember the sequence
number in a slot that has been shrunken and then re-activated.

Will this dead slot be subject to being freed by the session
shrinker?

But the proposed implementation accepts 1 in this case, and it
doesn't seem tremendously difficult to remove the "remember the
seqid" mechanism once it has been codified to everyone's
satisfaction. So I won't belabor the point.


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-20  0:25       ` Chuck Lever
@ 2024-11-21 21:03         ` NeilBrown
  2024-11-21 21:24           ` Chuck Lever III
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-11-21 21:03 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Wed, Nov 20, 2024 at 09:24:52AM +1100, NeilBrown wrote:
> > On Wed, 20 Nov 2024, Chuck Lever wrote:
> > > On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
> > > > Each client now reports the number of slots allocated in each session.
> > > > 
> > > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > > ---
> > > >  fs/nfsd/nfs4state.c | 8 ++++++++
> > > >  1 file changed, 8 insertions(+)
> > > > 
> > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > index 3889ba1c653f..31ff9f92a895 100644
> > > > --- a/fs/nfsd/nfs4state.c
> > > > +++ b/fs/nfsd/nfs4state.c
> > > > @@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
> > > >  static int client_info_show(struct seq_file *m, void *v)
> > > >  {
> > > >  	struct inode *inode = file_inode(m->file);
> > > > +	struct nfsd4_session *ses;
> > > >  	struct nfs4_client *clp;
> > > >  	u64 clid;
> > > >  
> > > > @@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
> > > >  	seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
> > > >  	seq_printf(m, "admin-revoked states: %d\n",
> > > >  		   atomic_read(&clp->cl_admin_revoked));
> > > > +	seq_printf(m, "session slots:");
> > > > +	spin_lock(&clp->cl_lock);
> > > > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > > > +		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > > > +	spin_unlock(&clp->cl_lock);
> > > > +	seq_puts(m, "\n");
> > > > +
> > > 
> > > Also, I wonder if information about the backchannel session can be
> > > surfaced in this way?
> > > 
> > 
> > Probably make sense.  Maybe we should invent a syntax for reporting
> > arbitrary info about each session.
> > 
> >    session %d slots: %d
> >    session %d cb-slots: %d
> >    ...
> > 
> > ???
> 
> If each client has a directory, then it should have a subdirectory
> called "sessions". Each subdirectory of "sessions" should be one
> session, named by its hex session ID (as it is presented by
> Wireshark). Each session directory could have a file for the forward
> channel, one for the backchannel, and maybe one for generic
> information like when the session was created and how many
> connections it has.
> 
> We don't need all of that in this patch set, but whatever is
> introduced here should be extensible to allow us to add more over
> time.

I cannot say I'm excited about the proliferation of tiny files.  Your
suggestion isn't quite as bad as sysfs which claims to want one file per
value, but I think the sysfs approach provided more pain than gain and
you seem to be heading that way.  As evidence I present the rise of
netlink.  Netlink's main advantage is that it allows you to access a
collection of data in a single syscall (or maybe pair of syscalls).  If
we had a standard format for doing that with open/read/close, the
filesystem would be a much nicer interface.  But the sysfs rules prevent
that, so people who care avoid it.

We don't need to impose those same rules on nfsd-fs.

Having separate dirs for the clients makes some sense as the clients are
quite independent.  Sessions aren't - they are just part of the client. 
The *only* way session information is different from other client
information is that there is more structure - an array of sessions each
with detail.  I don't think that justifies a new directory.  I does
justify a carefully designed (or chosen) format for representing
structured data.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand.
  2024-11-20  0:32       ` Chuck Lever
@ 2024-11-21 21:20         ` NeilBrown
  0 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-11-21 21:20 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Wed, Nov 20, 2024 at 09:27:51AM +1100, NeilBrown wrote:
> > On Wed, 20 Nov 2024, Chuck Lever wrote:
> > > On Tue, Nov 19, 2024 at 11:41:31AM +1100, NeilBrown wrote:
> > > > If a client ever uses the highest available slot for a given session,
> > > > attempt to allocate another slot so there is room for the client to use
> > > > more slots if wanted.  GFP_NOWAIT is used so if there is not plenty of
> > > > free memory, failure is expected - which is what we want.  It also
> > > > allows the allocation while holding a spinlock.
> > > > 
> > > > We would expect to stablise with one more slot available than the client
> > > > actually uses.
> > > 
> > > Which begs the question "why have a 2048 slot maximum session slot
> > > table size?" 1025 might work too. But is there a need for any
> > > maximum at all, or is this just a sanity check?
> > 
> > Linux NFS presumably isn't the only client, and it might change in the
> > future.  Maybe there is no need for a maximum.  It was mostly as a
> > sanity check.
> > 
> > It wouldn't take much to convince me to remove the limit.
> 
> What's the worse that might happen if there is no cap? Can this be
> used as a DoS vector?

It depends on how much you trust the clients that you have decided to
trust.  Probably we want the option of a "public" NFS server (read only
probably) so we cannot assume much trust in the implementation of the
client.

Certainly a client could only ever use the highest slot number available
- though the RFC prefers lowest - and that could push allocating through
the roof.  We could defend against that in more subtle ways, but a hard
upper limit is easy.

> 
> If a maximum should be necessary, its value should be clearly
> labeled as "not an architectural limit -- for sanity checking only".

That is certainly sensible.

> 
> 
> > > > Now that we grow the slot table on demand we can start with a smaller
> > > > allocation.  Define NFSD_MAX_INITIAL_SLOTS and allocate at most that
> > > > many when session is created.
> > > 
> > > Maybe NFSD_DEFAULT_INITIAL_SLOTS is more descriptive?
> > 
> > I don't think "DEFAULT" is the right word.  The client requests a number
> > of slots.  That is the "Default".  The server can impose a limit - a
> > maximum.
> > Maybe we don't need a limit here either?
> 
> I see. Well I don't think there needs to be a "maximum" number of
> initial slots. NFSD can try to allocate the number the client
> requested as best it can, until it hits our sane maximum above.

Given that we have a shrinker to discard them if they ever become a
problem, that makes sense.

> 
> I think sessions should have a minimum number of slots to guarantee
> forward progress (or IOW prevent a deadlock). I would say that
> number should be larger than 1 -- perhaps 2 or even 4.

I think one is enough to ensure forward progress.  Otherwise the RFC
would have something to say about this.

> 
> The problem with a small initial slot count is that means the
> session has a slow start heuristic. That might or might not be
> desirable here.

The question of how quickly to increase slot count can be relevant at
any time, not just at session creation time.  If there is a bust of
activity after a quite time during which the shrinker discarded a lot of
slots - how quickly should we rebuild?
My current approach is effectively one new slot per requests round-trip.
So there might be 1 request in flight.  Then 2.  Then 3. etc.

We could aim for exponential rather than linear growth.  Maybe when the
highest slot is used, add 20% of the current number of slots - rounded
up.
So 1,2,3,4,5,6,8,10,12,15,18,22,26,31,37,44,52,62,74,88,105,126,

??

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info
  2024-11-21 21:03         ` NeilBrown
@ 2024-11-21 21:24           ` Chuck Lever III
  0 siblings, 0 replies; 47+ messages in thread
From: Chuck Lever III @ 2024-11-21 21:24 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Nov 21, 2024, at 4:03 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Wed, 20 Nov 2024, Chuck Lever wrote:
>> On Wed, Nov 20, 2024 at 09:24:52AM +1100, NeilBrown wrote:
>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>> On Tue, Nov 19, 2024 at 11:41:30AM +1100, NeilBrown wrote:
>>>>> Each client now reports the number of slots allocated in each session.
>>>>> 
>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>> ---
>>>>> fs/nfsd/nfs4state.c | 8 ++++++++
>>>>> 1 file changed, 8 insertions(+)
>>>>> 
>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>> index 3889ba1c653f..31ff9f92a895 100644
>>>>> --- a/fs/nfsd/nfs4state.c
>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>> @@ -2642,6 +2642,7 @@ static const char *cb_state2str(int state)
>>>>> static int client_info_show(struct seq_file *m, void *v)
>>>>> {
>>>>> struct inode *inode = file_inode(m->file);
>>>>> + struct nfsd4_session *ses;
>>>>> struct nfs4_client *clp;
>>>>> u64 clid;
>>>>> 
>>>>> @@ -2678,6 +2679,13 @@ static int client_info_show(struct seq_file *m, void *v)
>>>>> seq_printf(m, "callback address: \"%pISpc\"\n", &clp->cl_cb_conn.cb_addr);
>>>>> seq_printf(m, "admin-revoked states: %d\n",
>>>>>    atomic_read(&clp->cl_admin_revoked));
>>>>> + seq_printf(m, "session slots:");
>>>>> + spin_lock(&clp->cl_lock);
>>>>> + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>>>> + seq_printf(m, " %u", ses->se_fchannel.maxreqs);
>>>>> + spin_unlock(&clp->cl_lock);
>>>>> + seq_puts(m, "\n");
>>>>> +
>>>> 
>>>> Also, I wonder if information about the backchannel session can be
>>>> surfaced in this way?
>>>> 
>>> 
>>> Probably make sense.  Maybe we should invent a syntax for reporting
>>> arbitrary info about each session.
>>> 
>>>   session %d slots: %d
>>>   session %d cb-slots: %d
>>>   ...
>>> 
>>> ???
>> 
>> If each client has a directory, then it should have a subdirectory
>> called "sessions". Each subdirectory of "sessions" should be one
>> session, named by its hex session ID (as it is presented by
>> Wireshark). Each session directory could have a file for the forward
>> channel, one for the backchannel, and maybe one for generic
>> information like when the session was created and how many
>> connections it has.
>> 
>> We don't need all of that in this patch set, but whatever is
>> introduced here should be extensible to allow us to add more over
>> time.
> 
> I cannot say I'm excited about the proliferation of tiny files.  Your
> suggestion isn't quite as bad as sysfs which claims to want one file per
> value, but I think the sysfs approach provided more pain than gain and
> you seem to be heading that way.  As evidence I present the rise of
> netlink.  Netlink's main advantage is that it allows you to access a
> collection of data in a single syscall (or maybe pair of syscalls).  If
> we had a standard format for doing that with open/read/close, the
> filesystem would be a much nicer interface.  But the sysfs rules prevent
> that, so people who care avoid it.

I don't see this set of information as being in a
performance path. Needing multiple open/read/close
iterations doesn't seem like an impediment to me.

The only possible issue is that user space might
want a snapshot of certain related values, and
having to get the values from multiple files means
there's no guarantee that the values are consistent
with each other.


> We don't need to impose those same rules on nfsd-fs.
> 
> Having separate dirs for the clients makes some sense as the clients are
> quite independent.  Sessions aren't - they are just part of the client. 
> The *only* way session information is different from other client
> information is that there is more structure - an array of sessions each
> with detail.  I don't think that justifies a new directory.

Hrm. IMHO a directory is exactly suited to this kind of
information hierarchy. I can't say that I understand your
view; perhaps you feel this way because the client
implementations we are familiar with use only a single
session. For that, of course, a directory is overkill.


> It does
> justify a carefully designed (or chosen) format for representing
> structured data.

That usually means JSON or XML, which also have their haters.

However, I don't feel strongly about this. You asked me
for some thoughts, and here they are, at random.

My bottom line is reasonable extensibility -- the ability to
provide more information in these files in the future without
perturbing current consumers. IME that's been nearly impossible
with designs that have one file full of fields that need to be
parsed.

Should we expose session information via the new NFSD netlink
protocol instead? Or a sessions/ directory with one formatted
file per session? I'm open to discussion.


--
Chuck Lever



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-20  1:27       ` Chuck Lever
@ 2024-11-21 21:47         ` NeilBrown
  2024-11-21 22:29           ` Chuck Lever III
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-11-21 21:47 UTC (permalink / raw)
  To: Chuck Lever
  Cc: Jeff Layton, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Wed, 20 Nov 2024, Chuck Lever wrote:
> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> > On Wed, 20 Nov 2024, Chuck Lever wrote:
> > > On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> > > > Reducing the number of slots in the session slot table requires
> > > > confirmation from the client.  This patch adds reduce_session_slots()
> > > > which starts the process of getting confirmation, but never calls it.
> > > > That will come in a later patch.
> > > > 
> > > > Before we can free a slot we need to confirm that the client won't try
> > > > to use it again.  This involves returning a lower cr_maxrequests in a
> > > > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > > > is not larger than we limit we are trying to impose.  So for each slot
> > > > we need to remember that we have sent a reduced cr_maxrequests.
> > > > 
> > > > To achieve this we introduce a concept of request "generations".  Each
> > > > time we decide to reduce cr_maxrequests we increment the generation
> > > > number, and record this when we return the lower cr_maxrequests to the
> > > > client.  When a slot with the current generation reports a low
> > > > ca_maxrequests, we commit to that level and free extra slots.
> > > > 
> > > > We use an 8 bit generation number (64 seems wasteful) and if it cycles
> > > > we iterate all slots and reset the generation number to avoid false matches.
> > > > 
> > > > When we free a slot we store the seqid in the slot pointer so that it can
> > > > be restored when we reactivate the slot.  The RFC can be read as
> > > > suggesting that the slot number could restart from one after a slot is
> > > > retired and reactivated, but also suggests that retiring slots is not
> > > > required.  So when we reactive a slot we accept with the next seqid in
> > > > sequence, or 1.
> > > > 
> > > > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > > > matches how it is encoded for the reply.
> > > > 
> > > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > > ---
> > > >  fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> > > >  fs/nfsd/nfs4xdr.c   |  5 +--
> > > >  fs/nfsd/state.h     |  4 +++
> > > >  fs/nfsd/xdr4.h      |  2 --
> > > >  4 files changed, 76 insertions(+), 16 deletions(-)
> > > > 
> > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > > > index fb522165b376..0625b0aec6b8 100644
> > > > --- a/fs/nfsd/nfs4state.c
> > > > +++ b/fs/nfsd/nfs4state.c
> > > > @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> > > >  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> > > >  
> > > >  static void
> > > > -free_session_slots(struct nfsd4_session *ses)
> > > > +free_session_slots(struct nfsd4_session *ses, int from)
> > > >  {
> > > >  	int i;
> > > >  
> > > > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > > > +	if (from >= ses->se_fchannel.maxreqs)
> > > > +		return;
> > > > +
> > > > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> > > >  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > > >  
> > > > -		xa_erase(&ses->se_slots, i);
> > > > +		/*
> > > > +		 * Save the seqid in case we reactivate this slot.
> > > > +		 * This will never require a memory allocation so GFP
> > > > +		 * flag is irrelevant
> > > > +		 */
> > > > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> > > > +			 GFP_ATOMIC);
> > > 
> > > Again... ATOMIC is probably not what we want here, even if it is
> > > only documentary.
> > 
> > Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> > a warning.
> 
> I find using GFP_ATOMIC here to be confusing -- it requests
> allocation from special memory reserves and is to be used in
> situations where allocation might result in system failure. That is
> clearly not the case here, and the resulting memory allocation might
> be long-lived.

Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??

My understanding of how GFP_ATOMIC is used is it is what people choose
when they have to allocate in a no-sleep context.  It can fail and there
must always be a fall-back option.  In many cases GFP_NOWAIT could
possibly be used when it isn't a high priority, but there are 430 uses
for GFP_NOWAIT compared with over 5000 of GFP_ATOMIC.


> 
> I see the comment that says memory won't actually be allocated. I'm
> not sure that's the way xa_store() works, however.

xarray.rst says:

  The xa_store(), xa_cmpxchg(), xa_alloc(),
  xa_reserve() and xa_insert() functions take a gfp_t
  parameter in case the XArray needs to allocate memory to store this entry.
  If the entry is being deleted, no memory allocation needs to be performed,
  and the GFP flags specified will be ignored.`

The particular context is that a normal pointer is currently stored a
the given index, and we are replacing that with a number.  The above
doesn't explicitly say that won't require a memory allocation, but I
think it is reasonable to say it won't need "to allocate memory to store
this entry" - as an entry is already stored - so allocation should not
be needed.

> 
> I don't immediately see another good choice, however. I can reach
> out to Matthew and Liam and see if they have a better idea.
> 
> 
> > > And, I thought we determined that an unretired slot had a sequence
> > > number that is reset. Why save the slot's seqid? If I'm missing
> > > something, the comment here should be bolstered to explain it.
> > 
> > It isn't clear to me that we determined that - only the some people
> > asserted it.
> 
> From what I've read, everyone else who responded has said "use one".
> And they have provided enough spec quotations that 1 seems like the
> right initial slot sequence number value, always.
> 
> You should trust Tom Talpey's opinion on this. He was directly
> involved 25 years ago when sessions were invented in DAFS and then
> transferred into the NFSv4.1 protocol.

Dave Noveck (also deeply involved) say:

   It does.  The problem is that it undercuts the core goal of the
   slot-based approach 
   In that it makes it possible to have multiple requests with the same
   session id/ slot ID / sequence triple.

i.e.  resetting to 1 undercuts the core goal....  That is not a
resounding endorsement.

While I respect the people, I prefer to trust reasoned arguments.

What exactly is the sequence number protecting against?  It must be
protecting against a request being sent, not reply received, connection
closed, request sent on another connection, reply received.  Original
request getting to the server before the "close" message.  Server must
be sure not to handle this request.  sequence number provides that.

But what if the above all happens for request with seqno of 1, then the
server and client negotiate a "retiring" of slot 1 and then it's reuse
before the original request arrives.  How does the server know to ignore
it?

And Tom said that the handling of maxreq etc is "optional" for both
server and client.  So how can the server know if the client has retired
the slot when doing so is optional???

I really don't think we have clarity on this at all.

> 
> 
> > Until the spec is clarified I think it is safest to be cautious.
> 
> The usual line we draw for adding code/features/complexity is the
> proposer must demonstrate a use case for it. So far I have not seen
> a client implementation that needs a server to remember the sequence
> number in a slot that has been shrunken and then re-activated.

And I cannot in good faith submit code that I think violates the spec.

> 
> Will this dead slot be subject to being freed by the session
> shrinker?

No, but it uses much much less space than a slot.

> 
> But the proposed implementation accepts 1 in this case, and it
> doesn't seem tremendously difficult to remove the "remember the
> seqid" mechanism once it has been codified to everyone's
> satisfaction. So I won't belabor the point.

Thank you!

NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-21 21:47         ` NeilBrown
@ 2024-11-21 22:29           ` Chuck Lever III
  2024-12-02 16:11             ` Chuck Lever III
  0 siblings, 1 reply; 47+ messages in thread
From: Chuck Lever III @ 2024-11-21 22:29 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Wed, 20 Nov 2024, Chuck Lever wrote:
>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>> Reducing the number of slots in the session slot table requires
>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>> which starts the process of getting confirmation, but never calls it.
>>>>> That will come in a later patch.
>>>>> 
>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>> 
>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>> client.  When a slot with the current generation reports a low
>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>> 
>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>> 
>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>> suggesting that the slot number could restart from one after a slot is
>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>> sequence, or 1.
>>>>> 
>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>> matches how it is encoded for the reply.
>>>>> 
>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>> ---
>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>> fs/nfsd/state.h     |  4 +++
>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>> 
>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>> --- a/fs/nfsd/nfs4state.c
>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>> 
>>>>> static void
>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>> {
>>>>> int i;
>>>>> 
>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>> + return;
>>>>> +
>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>> 
>>>>> - xa_erase(&ses->se_slots, i);
>>>>> + /*
>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>> +  * This will never require a memory allocation so GFP
>>>>> +  * flag is irrelevant
>>>>> +  */
>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>> +  GFP_ATOMIC);
>>>> 
>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>> only documentary.
>>> 
>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>> a warning.
>> 
>> I find using GFP_ATOMIC here to be confusing -- it requests
>> allocation from special memory reserves and is to be used in
>> situations where allocation might result in system failure. That is
>> clearly not the case here, and the resulting memory allocation might
>> be long-lived.
> 
> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??

I will be comfortable when I hear back from Matthew and Liam.

:-)


>> I see the comment that says memory won't actually be allocated. I'm
>> not sure that's the way xa_store() works, however.
> 
> xarray.rst says:
> 
>  The xa_store(), xa_cmpxchg(), xa_alloc(),
>  xa_reserve() and xa_insert() functions take a gfp_t
>  parameter in case the XArray needs to allocate memory to store this entry.
>  If the entry is being deleted, no memory allocation needs to be performed,
>  and the GFP flags specified will be ignored.`
> 
> The particular context is that a normal pointer is currently stored a
> the given index, and we are replacing that with a number.  The above
> doesn't explicitly say that won't require a memory allocation, but I
> think it is reasonable to say it won't need "to allocate memory to store
> this entry" - as an entry is already stored - so allocation should not
> be needed.

xa_mk_value() converts a number to a pointer, and xa_store
stores that pointer.

I suspect that xa_store() is allowed to rebalance the
xarray's internal data structures, and that could result
in memory release or allocation. That's why a GFP flag is
one of the arguments.


--
Chuck Lever



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-11-21 22:29           ` Chuck Lever III
@ 2024-12-02 16:11             ` Chuck Lever III
  2024-12-03  4:28               ` NeilBrown
  0 siblings, 1 reply; 47+ messages in thread
From: Chuck Lever III @ 2024-12-02 16:11 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Nov 21, 2024, at 5:29 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> 
> 
> 
>> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
>> 
>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>>> Reducing the number of slots in the session slot table requires
>>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>>> which starts the process of getting confirmation, but never calls it.
>>>>>> That will come in a later patch.
>>>>>> 
>>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>>> 
>>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>>> client.  When a slot with the current generation reports a low
>>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>>> 
>>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>>> 
>>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>>> suggesting that the slot number could restart from one after a slot is
>>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>>> sequence, or 1.
>>>>>> 
>>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>>> matches how it is encoded for the reply.
>>>>>> 
>>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>>> ---
>>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>>> fs/nfsd/state.h     |  4 +++
>>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>>> 
>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>>> 
>>>>>> static void
>>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>>> {
>>>>>> int i;
>>>>>> 
>>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>>> + return;
>>>>>> +
>>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>>> 
>>>>>> - xa_erase(&ses->se_slots, i);
>>>>>> + /*
>>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>>> +  * This will never require a memory allocation so GFP
>>>>>> +  * flag is irrelevant
>>>>>> +  */
>>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>>> +  GFP_ATOMIC);
>>>>> 
>>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>>> only documentary.
>>>> 
>>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>>> a warning.
>>> 
>>> I find using GFP_ATOMIC here to be confusing -- it requests
>>> allocation from special memory reserves and is to be used in
>>> situations where allocation might result in system failure. That is
>>> clearly not the case here, and the resulting memory allocation might
>>> be long-lived.
>> 
>> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??
> 
> I will be comfortable when I hear back from Matthew and Liam.
> 
> :-)
> 
> 
>>> I see the comment that says memory won't actually be allocated. I'm
>>> not sure that's the way xa_store() works, however.
>> 
>> xarray.rst says:
>> 
>> The xa_store(), xa_cmpxchg(), xa_alloc(),
>> xa_reserve() and xa_insert() functions take a gfp_t
>> parameter in case the XArray needs to allocate memory to store this entry.
>> If the entry is being deleted, no memory allocation needs to be performed,
>> and the GFP flags specified will be ignored.`
>> 
>> The particular context is that a normal pointer is currently stored a
>> the given index, and we are replacing that with a number.  The above
>> doesn't explicitly say that won't require a memory allocation, but I
>> think it is reasonable to say it won't need "to allocate memory to store
>> this entry" - as an entry is already stored - so allocation should not
>> be needed.
> 
> xa_mk_value() converts a number to a pointer, and xa_store
> stores that pointer.
> 
> I suspect that xa_store() is allowed to rebalance the
> xarray's internal data structures, and that could result
> in memory release or allocation. That's why a GFP flag is
> one of the arguments.

Matthew says the xa_store() is guaranteed not to do a memory
allocation in this case. However, they prefer an annotation
of the call site with a "0" GFP argument to show that the
allocation flags are not relevant.

Does this:

	xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);

work for you?

--
Chuck Lever



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-02 16:11             ` Chuck Lever III
@ 2024-12-03  4:28               ` NeilBrown
  2024-12-03 14:40                 ` Chuck Lever III
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-12-03  4:28 UTC (permalink / raw)
  To: Chuck Lever III
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey

On Tue, 03 Dec 2024, Chuck Lever III wrote:
> 
> 
> > On Nov 21, 2024, at 5:29 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
> > 
> > 
> > 
> >> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
> >> 
> >> On Wed, 20 Nov 2024, Chuck Lever wrote:
> >>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
> >>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
> >>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
> >>>>>> Reducing the number of slots in the session slot table requires
> >>>>>> confirmation from the client.  This patch adds reduce_session_slots()
> >>>>>> which starts the process of getting confirmation, but never calls it.
> >>>>>> That will come in a later patch.
> >>>>>> 
> >>>>>> Before we can free a slot we need to confirm that the client won't try
> >>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
> >>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> >>>>>> is not larger than we limit we are trying to impose.  So for each slot
> >>>>>> we need to remember that we have sent a reduced cr_maxrequests.
> >>>>>> 
> >>>>>> To achieve this we introduce a concept of request "generations".  Each
> >>>>>> time we decide to reduce cr_maxrequests we increment the generation
> >>>>>> number, and record this when we return the lower cr_maxrequests to the
> >>>>>> client.  When a slot with the current generation reports a low
> >>>>>> ca_maxrequests, we commit to that level and free extra slots.
> >>>>>> 
> >>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> >>>>>> we iterate all slots and reset the generation number to avoid false matches.
> >>>>>> 
> >>>>>> When we free a slot we store the seqid in the slot pointer so that it can
> >>>>>> be restored when we reactivate the slot.  The RFC can be read as
> >>>>>> suggesting that the slot number could restart from one after a slot is
> >>>>>> retired and reactivated, but also suggests that retiring slots is not
> >>>>>> required.  So when we reactive a slot we accept with the next seqid in
> >>>>>> sequence, or 1.
> >>>>>> 
> >>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> >>>>>> matches how it is encoded for the reply.
> >>>>>> 
> >>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
> >>>>>> ---
> >>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
> >>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
> >>>>>> fs/nfsd/state.h     |  4 +++
> >>>>>> fs/nfsd/xdr4.h      |  2 --
> >>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
> >>>>>> 
> >>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >>>>>> index fb522165b376..0625b0aec6b8 100644
> >>>>>> --- a/fs/nfsd/nfs4state.c
> >>>>>> +++ b/fs/nfsd/nfs4state.c
> >>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
> >>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >>>>>> 
> >>>>>> static void
> >>>>>> -free_session_slots(struct nfsd4_session *ses)
> >>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
> >>>>>> {
> >>>>>> int i;
> >>>>>> 
> >>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> >>>>>> + if (from >= ses->se_fchannel.maxreqs)
> >>>>>> + return;
> >>>>>> +
> >>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >>>>>> 
> >>>>>> - xa_erase(&ses->se_slots, i);
> >>>>>> + /*
> >>>>>> +  * Save the seqid in case we reactivate this slot.
> >>>>>> +  * This will never require a memory allocation so GFP
> >>>>>> +  * flag is irrelevant
> >>>>>> +  */
> >>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
> >>>>>> +  GFP_ATOMIC);
> >>>>> 
> >>>>> Again... ATOMIC is probably not what we want here, even if it is
> >>>>> only documentary.
> >>>> 
> >>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
> >>>> a warning.
> >>> 
> >>> I find using GFP_ATOMIC here to be confusing -- it requests
> >>> allocation from special memory reserves and is to be used in
> >>> situations where allocation might result in system failure. That is
> >>> clearly not the case here, and the resulting memory allocation might
> >>> be long-lived.
> >> 
> >> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??
> > 
> > I will be comfortable when I hear back from Matthew and Liam.
> > 
> > :-)
> > 
> > 
> >>> I see the comment that says memory won't actually be allocated. I'm
> >>> not sure that's the way xa_store() works, however.
> >> 
> >> xarray.rst says:
> >> 
> >> The xa_store(), xa_cmpxchg(), xa_alloc(),
> >> xa_reserve() and xa_insert() functions take a gfp_t
> >> parameter in case the XArray needs to allocate memory to store this entry.
> >> If the entry is being deleted, no memory allocation needs to be performed,
> >> and the GFP flags specified will be ignored.`
> >> 
> >> The particular context is that a normal pointer is currently stored a
> >> the given index, and we are replacing that with a number.  The above
> >> doesn't explicitly say that won't require a memory allocation, but I
> >> think it is reasonable to say it won't need "to allocate memory to store
> >> this entry" - as an entry is already stored - so allocation should not
> >> be needed.
> > 
> > xa_mk_value() converts a number to a pointer, and xa_store
> > stores that pointer.
> > 
> > I suspect that xa_store() is allowed to rebalance the
> > xarray's internal data structures, and that could result
> > in memory release or allocation. That's why a GFP flag is
> > one of the arguments.
> 
> Matthew says the xa_store() is guaranteed not to do a memory
> allocation in this case. However, they prefer an annotation
> of the call site with a "0" GFP argument to show that the
> allocation flags are not relevant.
> 
> Does this:
> 
> 	xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> 
> work for you?

Sure.
And it looks like sparse will be happy even though "0" isn't explicitly
"gfp_t" because 0 is "special".

I might prefer GFP_NULL or similar, but 0 certainly works for me.  I'll
include that when I resend.

Thanks
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-03  4:28               ` NeilBrown
@ 2024-12-03 14:40                 ` Chuck Lever III
  0 siblings, 0 replies; 47+ messages in thread
From: Chuck Lever III @ 2024-12-03 14:40 UTC (permalink / raw)
  To: Neil Brown
  Cc: Jeff Layton, Linux NFS Mailing List, Olga Kornievskaia, Dai Ngo,
	Tom Talpey



> On Dec 2, 2024, at 11:28 PM, NeilBrown <neilb@suse.de> wrote:
> 
> On Tue, 03 Dec 2024, Chuck Lever III wrote:
>> 
>> 
>>> On Nov 21, 2024, at 5:29 PM, Chuck Lever III <chuck.lever@oracle.com> wrote:
>>> 
>>> 
>>> 
>>>> On Nov 21, 2024, at 4:47 PM, NeilBrown <neilb@suse.de> wrote:
>>>> 
>>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>>> On Wed, Nov 20, 2024 at 09:35:00AM +1100, NeilBrown wrote:
>>>>>> On Wed, 20 Nov 2024, Chuck Lever wrote:
>>>>>>> On Tue, Nov 19, 2024 at 11:41:32AM +1100, NeilBrown wrote:
>>>>>>>> Reducing the number of slots in the session slot table requires
>>>>>>>> confirmation from the client.  This patch adds reduce_session_slots()
>>>>>>>> which starts the process of getting confirmation, but never calls it.
>>>>>>>> That will come in a later patch.
>>>>>>>> 
>>>>>>>> Before we can free a slot we need to confirm that the client won't try
>>>>>>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>>>>>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>>>>>>> is not larger than we limit we are trying to impose.  So for each slot
>>>>>>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>>>>>> 
>>>>>>>> To achieve this we introduce a concept of request "generations".  Each
>>>>>>>> time we decide to reduce cr_maxrequests we increment the generation
>>>>>>>> number, and record this when we return the lower cr_maxrequests to the
>>>>>>>> client.  When a slot with the current generation reports a low
>>>>>>>> ca_maxrequests, we commit to that level and free extra slots.
>>>>>>>> 
>>>>>>>> We use an 8 bit generation number (64 seems wasteful) and if it cycles
>>>>>>>> we iterate all slots and reset the generation number to avoid false matches.
>>>>>>>> 
>>>>>>>> When we free a slot we store the seqid in the slot pointer so that it can
>>>>>>>> be restored when we reactivate the slot.  The RFC can be read as
>>>>>>>> suggesting that the slot number could restart from one after a slot is
>>>>>>>> retired and reactivated, but also suggests that retiring slots is not
>>>>>>>> required.  So when we reactive a slot we accept with the next seqid in
>>>>>>>> sequence, or 1.
>>>>>>>> 
>>>>>>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>>>>>>> matches how it is encoded for the reply.
>>>>>>>> 
>>>>>>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>>>>>>> ---
>>>>>>>> fs/nfsd/nfs4state.c | 81 ++++++++++++++++++++++++++++++++++++++-------
>>>>>>>> fs/nfsd/nfs4xdr.c   |  5 +--
>>>>>>>> fs/nfsd/state.h     |  4 +++
>>>>>>>> fs/nfsd/xdr4.h      |  2 --
>>>>>>>> 4 files changed, 76 insertions(+), 16 deletions(-)
>>>>>>>> 
>>>>>>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>>>>>>> index fb522165b376..0625b0aec6b8 100644
>>>>>>>> --- a/fs/nfsd/nfs4state.c
>>>>>>>> +++ b/fs/nfsd/nfs4state.c
>>>>>>>> @@ -1910,17 +1910,55 @@ gen_sessionid(struct nfsd4_session *ses)
>>>>>>>> #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>>>>>> 
>>>>>>>> static void
>>>>>>>> -free_session_slots(struct nfsd4_session *ses)
>>>>>>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>>>>>> {
>>>>>>>> int i;
>>>>>>>> 
>>>>>>>> - for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>>>>>>> + if (from >= ses->se_fchannel.maxreqs)
>>>>>>>> + return;
>>>>>>>> +
>>>>>>>> + for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>>>>>> struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>>>>>> 
>>>>>>>> - xa_erase(&ses->se_slots, i);
>>>>>>>> + /*
>>>>>>>> +  * Save the seqid in case we reactivate this slot.
>>>>>>>> +  * This will never require a memory allocation so GFP
>>>>>>>> +  * flag is irrelevant
>>>>>>>> +  */
>>>>>>>> + xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid),
>>>>>>>> +  GFP_ATOMIC);
>>>>>>> 
>>>>>>> Again... ATOMIC is probably not what we want here, even if it is
>>>>>>> only documentary.
>>>>>> 
>>>>>> Why not?  It might be called under a spinlock so GFP_KERNEL might trigger
>>>>>> a warning.
>>>>> 
>>>>> I find using GFP_ATOMIC here to be confusing -- it requests
>>>>> allocation from special memory reserves and is to be used in
>>>>> situations where allocation might result in system failure. That is
>>>>> clearly not the case here, and the resulting memory allocation might
>>>>> be long-lived.
>>>> 
>>>> Would you be comfortable with GFP_NOWAIT which leaves out __GFP_HIGH ??
>>> 
>>> I will be comfortable when I hear back from Matthew and Liam.
>>> 
>>> :-)
>>> 
>>> 
>>>>> I see the comment that says memory won't actually be allocated. I'm
>>>>> not sure that's the way xa_store() works, however.
>>>> 
>>>> xarray.rst says:
>>>> 
>>>> The xa_store(), xa_cmpxchg(), xa_alloc(),
>>>> xa_reserve() and xa_insert() functions take a gfp_t
>>>> parameter in case the XArray needs to allocate memory to store this entry.
>>>> If the entry is being deleted, no memory allocation needs to be performed,
>>>> and the GFP flags specified will be ignored.`
>>>> 
>>>> The particular context is that a normal pointer is currently stored a
>>>> the given index, and we are replacing that with a number.  The above
>>>> doesn't explicitly say that won't require a memory allocation, but I
>>>> think it is reasonable to say it won't need "to allocate memory to store
>>>> this entry" - as an entry is already stored - so allocation should not
>>>> be needed.
>>> 
>>> xa_mk_value() converts a number to a pointer, and xa_store
>>> stores that pointer.
>>> 
>>> I suspect that xa_store() is allowed to rebalance the
>>> xarray's internal data structures, and that could result
>>> in memory release or allocation. That's why a GFP flag is
>>> one of the arguments.
>> 
>> Matthew says the xa_store() is guaranteed not to do a memory
>> allocation in this case. However, they prefer an annotation
>> of the call site with a "0" GFP argument to show that the
>> allocation flags are not relevant.
>> 
>> Does this:
>> 
>> xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>> 
>> work for you?
> 
> Sure.
> And it looks like sparse will be happy even though "0" isn't explicitly
> "gfp_t" because 0 is "special".
> 
> I might prefer GFP_NULL or similar, but 0 certainly works for me.  I'll
> include that when I resend.

Matthew suggested GFP_NOALLOC. But neither of these symbolic
flags exist yet. I'd rather not hold up this series behind
the bikeshedding of the flag name ;-)


--
Chuck Lever



^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  0:43 [PATCH 0/6 v3] " NeilBrown
@ 2024-12-06  0:43 ` NeilBrown
  2024-12-06  5:30   ` Jeff Layton
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-12-06  0:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 8 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 80 +++++++++++++++++++++++++++++++++++++++------
 fs/nfsd/nfs4xdr.c   |  5 +--
 fs/nfsd/state.h     |  4 +++
 fs/nfsd/xdr4.h      |  2 --
 4 files changed, 77 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ec4468ebbd40..e73668462739 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,54 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2722,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3715,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3727,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4279,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4304,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4320,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4341,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4307,17 +4359,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_ATOMIC | __GFP_NOWARN))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
+				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index aad547d3ad8b..74f2ab3c95aa 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -249,7 +249,9 @@ struct nfsd4_slot {
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
+	u8	sl_generation;
 	char	sl_data[];
 };
 
@@ -331,6 +333,8 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u8			se_slot_gen;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  0:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2024-12-06  5:30   ` Jeff Layton
  2024-12-06  6:05     ` NeilBrown
  0 siblings, 1 reply; 47+ messages in thread
From: Jeff Layton @ 2024-12-06  5:30 UTC (permalink / raw)
  To: NeilBrown, Chuck Lever; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 8 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 80 +++++++++++++++++++++++++++++++++++++++------
>  fs/nfsd/nfs4xdr.c   |  5 +--
>  fs/nfsd/state.h     |  4 +++
>  fs/nfsd/xdr4.h      |  2 --
>  4 files changed, 77 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index ec4468ebbd40..e73668462739 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,54 @@ gen_sessionid(struct nfsd4_session *ses)
>  #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>  
>  static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>  {
>  	int i;
>  
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>  		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>  
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>  		free_svc_cred(&slot->sl_cred);
>  		kfree(slot);
>  	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>  }
>  
>  /*
> @@ -1968,6 +2005,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	}
>  	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>  	new->se_cb_slot_avail = ~0U;
>  	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>  				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2081,7 +2119,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>  
>  static void __free_session(struct nfsd4_session *ses)
>  {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>  	xa_destroy(&ses->se_slots);
>  	kfree(ses);
>  }
> @@ -2684,6 +2722,9 @@ static int client_info_show(struct seq_file *m, void *v)
>  	seq_printf(m, "session slots:");
>  	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>  		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> +	seq_printf(m, "\nsession target slots:");
> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> +		seq_printf(m, " %u", ses->se_target_maxslots);
>  	spin_unlock(&clp->cl_lock);
>  	seq_puts(m, "\n");
>  
> @@ -3674,10 +3715,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>  	kfree(exid->server_impl_name);
>  }
>  
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>  {
>  	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>  		if (seqid == slot_seqid)
>  			return nfserr_jukebox;
>  		else
> @@ -3686,6 +3727,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>  	/* Note unsigned 32-bit arithmetic handles wraparound: */
>  	if (likely(seqid == slot_seqid + 1))
>  		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>  	if (seqid == slot_seqid)
>  		return nfserr_replay_cache;
>  	return nfserr_seq_misordered;
> @@ -4236,8 +4279,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>  
>  	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>  	if (status == nfserr_replay_cache) {
>  		status = nfserr_seq_misordered;
>  		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4262,6 +4304,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	if (status)
>  		goto out_put_session;
>  
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>  	buflen = (seq->cachethis) ?
>  			session->se_fchannel.maxresp_cached :
>  			session->se_fchannel.maxresp_sz;
> @@ -4272,9 +4320,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	svc_reserve(rqstp, buflen);
>  
>  	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>  	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>  	slot->sl_flags |= NFSD4_SLOT_INUSE;
> +	slot->sl_generation = session->se_slot_gen;
>  	if (seq->cachethis)
>  		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>  	else
> @@ -4291,9 +4341,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  	 * the client might use.
>  	 */
>  	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>  	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>  		int s = session->se_fchannel.maxreqs;
>  		int cnt = DIV_ROUND_UP(s, 5);
> +		void *prev_slot;
>  
>  		do {
>  			/*
> @@ -4307,17 +4359,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>  			 */
>  			slot = kzalloc(slot_bytes(&session->se_fchannel),
>  				       GFP_NOWAIT);
> +			prev_slot = xa_load(&session->se_slots, s);
> +			if (xa_is_value(prev_slot) && slot) {
> +				slot->sl_seqid = xa_to_value(prev_slot);
> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> +			}
>  			if (slot &&
>  			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>  						GFP_ATOMIC | __GFP_NOWARN))) {
>  				s += 1;
>  				session->se_fchannel.maxreqs = s;
> +				session->se_target_maxslots = s;
>  			} else {
>  				kfree(slot);
> +				slot = NULL;
>  			}
>  		} while (slot && --cnt > 0);
>  	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>  
>  out:
>  	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 53fac037611c..4dcb03cd9292 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>  		return nfserr_bad_xdr;
>  	seq->seqid = be32_to_cpup(p++);
>  	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>  	seq->cachethis = be32_to_cpup(p);
>  
>  	seq->status_flags = 0;
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>  	if (nfserr != nfs_ok)
>  		return nfserr;
>  	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index aad547d3ad8b..74f2ab3c95aa 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -249,7 +249,9 @@ struct nfsd4_slot {
>  #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>  #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>  #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>  	u8	sl_flags;
> +	u8	sl_generation;
>  	char	sl_data[];
>  };
>  
> @@ -331,6 +333,8 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>  	struct xarray		se_slots;	/* forward channel slots */
> +	u8			se_slot_gen;
> +	u32			se_target_maxslots;
>  };
>  
>  /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>  	u32			slotid;			/* request/response */
>  	u32			maxslots;		/* request/response */
>  	u32			cachethis;		/* request */
> -#if 0
>  	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>  	u32			status_flags;		/* response */
>  };
>  


I don't see where the above "#if 0" gets removed in patch 6. Shouldn't
it be?

While it makes for a larger patch, I think we'd be better served by
squashing 5 and 6 together. It doesn't make sense to add this core
infrastructure without something to call it.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  5:30   ` Jeff Layton
@ 2024-12-06  6:05     ` NeilBrown
  2024-12-06 13:59       ` Jeff Layton
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-12-06  6:05 UTC (permalink / raw)
  To: Jeff Layton
  Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 06 Dec 2024, Jeff Layton wrote:
> On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:

> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 382cc1389396..c26ba86dbdfd 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >  	u32			slotid;			/* request/response */
> >  	u32			maxslots;		/* request/response */
> >  	u32			cachethis;		/* request */
> > -#if 0
> >  	u32			target_maxslots;	/* response */
> > -#endif /* not yet */
> >  	u32			status_flags;		/* response */
> >  };
> >  
> 
> 
> I don't see where the above "#if 0" gets removed in patch 6. Shouldn't
> it be?

You are misreading.  It is being removed here in patch 5. 
It was added in 2.6.38 in 
Commit b85d4c01b76f ("nfsd41: sequence operation")


> 
> While it makes for a larger patch, I think we'd be better served by
> squashing 5 and 6 together. It doesn't make sense to add this core
> infrastructure without something to call it.

I find it easier to review if the distinct elements of functionality are
kept separate.  But if both you and Chuck want just one patch here, I
can do that.

Thanks,
NeilBrown


> -- 
> Jeff Layton <jlayton@kernel.org>
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-06  6:05     ` NeilBrown
@ 2024-12-06 13:59       ` Jeff Layton
  0 siblings, 0 replies; 47+ messages in thread
From: Jeff Layton @ 2024-12-06 13:59 UTC (permalink / raw)
  To: NeilBrown; +Cc: Chuck Lever, linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

On Fri, 2024-12-06 at 17:05 +1100, NeilBrown wrote:
> On Fri, 06 Dec 2024, Jeff Layton wrote:
> > On Fri, 2024-12-06 at 11:43 +1100, NeilBrown wrote:
> 
> > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > > index 382cc1389396..c26ba86dbdfd 100644
> > > --- a/fs/nfsd/xdr4.h
> > > +++ b/fs/nfsd/xdr4.h
> > > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> > >  	u32			slotid;			/* request/response */
> > >  	u32			maxslots;		/* request/response */
> > >  	u32			cachethis;		/* request */
> > > -#if 0
> > >  	u32			target_maxslots;	/* response */
> > > -#endif /* not yet */
> > >  	u32			status_flags;		/* response */
> > >  };
> > >  
> > 
> > 
> > I don't see where the above "#if 0" gets removed in patch 6. Shouldn't
> > it be?
> 
> You are misreading.  It is being removed here in patch 5. 
> It was added in 2.6.38 in 
> Commit b85d4c01b76f ("nfsd41: sequence operation")
> 

Oh, sorry -- my mistake. That's what I get for reviewing patches just
before boarding a redeye flight!

> 
> > 
> > While it makes for a larger patch, I think we'd be better served by
> > squashing 5 and 6 together. It doesn't make sense to add this core
> > infrastructure without something to call it.
> 
> I find it easier to review if the distinct elements of functionality are
> kept separate.  But if both you and Chuck want just one patch here, I
> can do that.
> 

The proposed code is bisectable, so I don't feel too strongly about it.
Adding in unused functions is "Not The Way We (Usually) Do Things"
though.

I think in this case it was harder for me to review, since I had to
skip ahead to patch #6 to see how reduce_session_slots() would actually
be used. The spin_trylock(), in particular was confusing until I
realized it was being called from a shrinker that iterated over all of
the clients and spinning there is probably not good.

Either way, a kerneldoc header over reduce_session_slots() that
explains this subtlety would be nice.
-- 
Jeff Layton <jlayton@kernel.org>

^ permalink raw reply	[flat|nested] 47+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-08 22:43 [PATCH 0/6 v4] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-12-08 22:43 ` NeilBrown
  0 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2024-12-08 22:43 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 16 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

se_dead is moved in struct nfsd4_session to remove a hole.

Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c   |  5 ++-
 fs/nfsd/state.h     |  6 ++-
 fs/nfsd/xdr4.h      |  2 -
 4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fd9473d487f3..a2d1f97b8a0e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses:  The session to affect
+ * @dec:  how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock.  As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ *   The number of slots that the target was reduced by.
+ */
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_NOWAIT))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
 				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index aad547d3ad8b..4251ff3c5ad1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -245,10 +245,12 @@ struct nfsd4_slot {
 	struct svc_cred sl_cred;
 	u32	sl_datalen;
 	u16	sl_opcnt;
+	u16	sl_generation;
 #define NFSD4_SLOT_INUSE	(1 << 0)
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
 	char	sl_data[];
 };
@@ -321,7 +323,6 @@ struct nfsd4_session {
 	u32			se_cb_slot_avail; /* bitmap of available slots */
 	u32			se_cb_highest_slot;	/* highest slot client wants */
 	u32			se_cb_prog;
-	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
 	struct nfs4_client	*se_client;
@@ -331,6 +332,9 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u16			se_slot_gen;
+	bool			se_dead;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-11 21:47 [PATCH 0/6 v5] nfsd: allocate/free session-based DRC slots on demand NeilBrown
@ 2024-12-11 21:47 ` NeilBrown
  2025-01-19  2:01   ` Chuck Lever
  0 siblings, 1 reply; 47+ messages in thread
From: NeilBrown @ 2024-12-11 21:47 UTC (permalink / raw)
  To: Chuck Lever, Jeff Layton
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey

Reducing the number of slots in the session slot table requires
confirmation from the client.  This patch adds reduce_session_slots()
which starts the process of getting confirmation, but never calls it.
That will come in a later patch.

Before we can free a slot we need to confirm that the client won't try
to use it again.  This involves returning a lower cr_maxrequests in a
SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
is not larger than we limit we are trying to impose.  So for each slot
we need to remember that we have sent a reduced cr_maxrequests.

To achieve this we introduce a concept of request "generations".  Each
time we decide to reduce cr_maxrequests we increment the generation
number, and record this when we return the lower cr_maxrequests to the
client.  When a slot with the current generation reports a low
ca_maxrequests, we commit to that level and free extra slots.

We use an 16 bit generation number (64 seems wasteful) and if it cycles
we iterate all slots and reset the generation number to avoid false matches.

When we free a slot we store the seqid in the slot pointer so that it can
be restored when we reactivate the slot.  The RFC can be read as
suggesting that the slot number could restart from one after a slot is
retired and reactivated, but also suggests that retiring slots is not
required.  So when we reactive a slot we accept with the next seqid in
sequence, or 1.

When decoding sa_highest_slotid into maxslots we need to add 1 - this
matches how it is encoded for the reply.

se_dead is moved in struct nfsd4_session to remove a hole.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
 fs/nfsd/nfs4xdr.c   |  5 ++-
 fs/nfsd/state.h     |  6 ++-
 fs/nfsd/xdr4.h      |  2 -
 4 files changed, 92 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fd9473d487f3..a2d1f97b8a0e 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
 #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
 
 static void
-free_session_slots(struct nfsd4_session *ses)
+free_session_slots(struct nfsd4_session *ses, int from)
 {
 	int i;
 
-	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+	if (from >= ses->se_fchannel.maxreqs)
+		return;
+
+	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
 		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
 
-		xa_erase(&ses->se_slots, i);
+		/*
+		 * Save the seqid in case we reactivate this slot.
+		 * This will never require a memory allocation so GFP
+		 * flag is irrelevant
+		 */
+		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
 		free_svc_cred(&slot->sl_cred);
 		kfree(slot);
 	}
+	ses->se_fchannel.maxreqs = from;
+	if (ses->se_target_maxslots > from)
+		ses->se_target_maxslots = from;
+}
+
+/**
+ * reduce_session_slots - reduce the target max-slots of a session if possible
+ * @ses:  The session to affect
+ * @dec:  how much to decrease the target by
+ *
+ * This interface can be used by a shrinker to reduce the target max-slots
+ * for a session so that some slots can eventually be freed.
+ * It uses spin_trylock() as it may be called in a context where another
+ * spinlock is held that has a dependency on client_lock.  As shrinkers are
+ * best-effort, skiping a session is client_lock is already held has no
+ * great coast
+ *
+ * Return value:
+ *   The number of slots that the target was reduced by.
+ */
+static int __maybe_unused
+reduce_session_slots(struct nfsd4_session *ses, int dec)
+{
+	struct nfsd_net *nn = net_generic(ses->se_client->net,
+					  nfsd_net_id);
+	int ret = 0;
+
+	if (ses->se_target_maxslots <= 1)
+		return ret;
+	if (!spin_trylock(&nn->client_lock))
+		return ret;
+	ret = min(dec, ses->se_target_maxslots-1);
+	ses->se_target_maxslots -= ret;
+	ses->se_slot_gen += 1;
+	if (ses->se_slot_gen == 0) {
+		int i;
+		ses->se_slot_gen = 1;
+		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
+			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
+			slot->sl_generation = 0;
+		}
+	}
+	spin_unlock(&nn->client_lock);
+	return ret;
 }
 
 /*
@@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
 	}
 	fattrs->maxreqs = i;
 	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
+	new->se_target_maxslots = i;
 	new->se_cb_slot_avail = ~0U;
 	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
 				      NFSD_BC_SLOT_TABLE_SIZE - 1);
@@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-	free_session_slots(ses);
+	free_session_slots(ses, 0);
 	xa_destroy(&ses->se_slots);
 	kfree(ses);
 }
@@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
 	seq_printf(m, "session slots:");
 	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
 		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
+	seq_printf(m, "\nsession target slots:");
+	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
+		seq_printf(m, " %u", ses->se_target_maxslots);
 	spin_unlock(&clp->cl_lock);
 	seq_puts(m, "\n");
 
@@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
 	kfree(exid->server_impl_name);
 }
 
-static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
+static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
 {
 	/* The slot is in use, and no response has been sent. */
-	if (slot_inuse) {
+	if (flags & NFSD4_SLOT_INUSE) {
 		if (seqid == slot_seqid)
 			return nfserr_jukebox;
 		else
@@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
 	/* Note unsigned 32-bit arithmetic handles wraparound: */
 	if (likely(seqid == slot_seqid + 1))
 		return nfs_ok;
+	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
+		return nfs_ok;
 	if (seqid == slot_seqid)
 		return nfserr_replay_cache;
 	return nfserr_seq_misordered;
@@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	dprintk("%s: slotid %d\n", __func__, seq->slotid);
 
 	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
-	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
-					slot->sl_flags & NFSD4_SLOT_INUSE);
+	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
 	if (status == nfserr_replay_cache) {
 		status = nfserr_seq_misordered;
 		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
@@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	if (status)
 		goto out_put_session;
 
+	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
+	    slot->sl_generation == session->se_slot_gen &&
+	    seq->maxslots <= session->se_target_maxslots)
+		/* Client acknowledged our reduce maxreqs */
+		free_session_slots(session, session->se_target_maxslots);
+
 	buflen = (seq->cachethis) ?
 			session->se_fchannel.maxresp_cached :
 			session->se_fchannel.maxresp_sz;
@@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	svc_reserve(rqstp, buflen);
 
 	status = nfs_ok;
-	/* Success! bump slot seqid */
+	/* Success! accept new slot seqid */
 	slot->sl_seqid = seq->seqid;
+	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
 	slot->sl_flags |= NFSD4_SLOT_INUSE;
+	slot->sl_generation = session->se_slot_gen;
 	if (seq->cachethis)
 		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
 	else
@@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client might use.
 	 */
 	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
+	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
 	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
 		int s = session->se_fchannel.maxreqs;
 		int cnt = DIV_ROUND_UP(s, 5);
+		void *prev_slot;
 
 		do {
 			/*
@@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			 */
 			slot = kzalloc(slot_bytes(&session->se_fchannel),
 				       GFP_NOWAIT);
+			prev_slot = xa_load(&session->se_slots, s);
+			if (xa_is_value(prev_slot) && slot) {
+				slot->sl_seqid = xa_to_value(prev_slot);
+				slot->sl_flags |= NFSD4_SLOT_REUSED;
+			}
 			if (slot &&
 			    !xa_is_err(xa_store(&session->se_slots, s, slot,
 						GFP_NOWAIT))) {
 				s += 1;
 				session->se_fchannel.maxreqs = s;
+				session->se_target_maxslots = s;
 			} else {
 				kfree(slot);
 				slot = NULL;
 			}
 		} while (slot && --cnt > 0);
 	}
-	seq->maxslots = session->se_fchannel.maxreqs;
+	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
+	seq->target_maxslots = session->se_target_maxslots;
 
 out:
 	switch (clp->cl_cb_state) {
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 53fac037611c..4dcb03cd9292 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
 		return nfserr_bad_xdr;
 	seq->seqid = be32_to_cpup(p++);
 	seq->slotid = be32_to_cpup(p++);
-	seq->maxslots = be32_to_cpup(p++);
+	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
+	seq->maxslots = be32_to_cpup(p++) + 1;
 	seq->cachethis = be32_to_cpup(p);
 
 	seq->status_flags = 0;
@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
 	if (nfserr != nfs_ok)
 		return nfserr;
 	/* sr_status_flags */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index aad547d3ad8b..4251ff3c5ad1 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -245,10 +245,12 @@ struct nfsd4_slot {
 	struct svc_cred sl_cred;
 	u32	sl_datalen;
 	u16	sl_opcnt;
+	u16	sl_generation;
 #define NFSD4_SLOT_INUSE	(1 << 0)
 #define NFSD4_SLOT_CACHETHIS	(1 << 1)
 #define NFSD4_SLOT_INITIALIZED	(1 << 2)
 #define NFSD4_SLOT_CACHED	(1 << 3)
+#define NFSD4_SLOT_REUSED	(1 << 4)
 	u8	sl_flags;
 	char	sl_data[];
 };
@@ -321,7 +323,6 @@ struct nfsd4_session {
 	u32			se_cb_slot_avail; /* bitmap of available slots */
 	u32			se_cb_highest_slot;	/* highest slot client wants */
 	u32			se_cb_prog;
-	bool			se_dead;
 	struct list_head	se_hash;	/* hash by sessionid */
 	struct list_head	se_perclnt;
 	struct nfs4_client	*se_client;
@@ -331,6 +332,9 @@ struct nfsd4_session {
 	struct list_head	se_conns;
 	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
 	struct xarray		se_slots;	/* forward channel slots */
+	u16			se_slot_gen;
+	bool			se_dead;
+	u32			se_target_maxslots;
 };
 
 /* formatted contents of nfs4_sessionid */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 382cc1389396..c26ba86dbdfd 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -576,9 +576,7 @@ struct nfsd4_sequence {
 	u32			slotid;			/* request/response */
 	u32			maxslots;		/* request/response */
 	u32			cachethis;		/* request */
-#if 0
 	u32			target_maxslots;	/* response */
-#endif /* not yet */
 	u32			status_flags;		/* response */
 };
 
-- 
2.47.0


^ permalink raw reply related	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2024-12-11 21:47 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
@ 2025-01-19  2:01   ` Chuck Lever
  2025-01-21  2:36     ` NeilBrown
  2025-01-27  4:08     ` NeilBrown
  0 siblings, 2 replies; 47+ messages in thread
From: Chuck Lever @ 2025-01-19  2:01 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 12/11/24 4:47 PM, NeilBrown wrote:
> Reducing the number of slots in the session slot table requires
> confirmation from the client.  This patch adds reduce_session_slots()
> which starts the process of getting confirmation, but never calls it.
> That will come in a later patch.
> 
> Before we can free a slot we need to confirm that the client won't try
> to use it again.  This involves returning a lower cr_maxrequests in a
> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> is not larger than we limit we are trying to impose.  So for each slot
> we need to remember that we have sent a reduced cr_maxrequests.
> 
> To achieve this we introduce a concept of request "generations".  Each
> time we decide to reduce cr_maxrequests we increment the generation
> number, and record this when we return the lower cr_maxrequests to the
> client.  When a slot with the current generation reports a low
> ca_maxrequests, we commit to that level and free extra slots.
> 
> We use an 16 bit generation number (64 seems wasteful) and if it cycles
> we iterate all slots and reset the generation number to avoid false matches.
> 
> When we free a slot we store the seqid in the slot pointer so that it can
> be restored when we reactivate the slot.  The RFC can be read as
> suggesting that the slot number could restart from one after a slot is
> retired and reactivated, but also suggests that retiring slots is not
> required.  So when we reactive a slot we accept with the next seqid in
> sequence, or 1.
> 
> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> matches how it is encoded for the reply.
> 
> se_dead is moved in struct nfsd4_session to remove a hole.
> 
> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>   fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
>   fs/nfsd/nfs4xdr.c   |  5 ++-
>   fs/nfsd/state.h     |  6 ++-
>   fs/nfsd/xdr4.h      |  2 -
>   4 files changed, 92 insertions(+), 15 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index fd9473d487f3..a2d1f97b8a0e 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
>   #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>   
>   static void
> -free_session_slots(struct nfsd4_session *ses)
> +free_session_slots(struct nfsd4_session *ses, int from)
>   {
>   	int i;
>   
> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +	if (from >= ses->se_fchannel.maxreqs)
> +		return;
> +
> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>   		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>   
> -		xa_erase(&ses->se_slots, i);
> +		/*
> +		 * Save the seqid in case we reactivate this slot.
> +		 * This will never require a memory allocation so GFP
> +		 * flag is irrelevant
> +		 */
> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>   		free_svc_cred(&slot->sl_cred);
>   		kfree(slot);
>   	}
> +	ses->se_fchannel.maxreqs = from;
> +	if (ses->se_target_maxslots > from)
> +		ses->se_target_maxslots = from;
> +}
> +
> +/**
> + * reduce_session_slots - reduce the target max-slots of a session if possible
> + * @ses:  The session to affect
> + * @dec:  how much to decrease the target by
> + *
> + * This interface can be used by a shrinker to reduce the target max-slots
> + * for a session so that some slots can eventually be freed.
> + * It uses spin_trylock() as it may be called in a context where another
> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> + * best-effort, skiping a session is client_lock is already held has no
> + * great coast
> + *
> + * Return value:
> + *   The number of slots that the target was reduced by.
> + */
> +static int __maybe_unused
> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> +{
> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> +					  nfsd_net_id);
> +	int ret = 0;
> +
> +	if (ses->se_target_maxslots <= 1)
> +		return ret;
> +	if (!spin_trylock(&nn->client_lock))
> +		return ret;
> +	ret = min(dec, ses->se_target_maxslots-1);
> +	ses->se_target_maxslots -= ret;
> +	ses->se_slot_gen += 1;
> +	if (ses->se_slot_gen == 0) {
> +		int i;
> +		ses->se_slot_gen = 1;
> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> +			slot->sl_generation = 0;
> +		}
> +	}
> +	spin_unlock(&nn->client_lock);
> +	return ret;
>   }
>   
>   /*
> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>   	}
>   	fattrs->maxreqs = i;
>   	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> +	new->se_target_maxslots = i;
>   	new->se_cb_slot_avail = ~0U;
>   	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>   				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>   
>   static void __free_session(struct nfsd4_session *ses)
>   {
> -	free_session_slots(ses);
> +	free_session_slots(ses, 0);
>   	xa_destroy(&ses->se_slots);
>   	kfree(ses);
>   }
> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
>   	seq_printf(m, "session slots:");
>   	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>   		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> +	seq_printf(m, "\nsession target slots:");
> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> +		seq_printf(m, " %u", ses->se_target_maxslots);
>   	spin_unlock(&clp->cl_lock);
>   	seq_puts(m, "\n");
>   
> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>   	kfree(exid->server_impl_name);
>   }
>   
> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>   {
>   	/* The slot is in use, and no response has been sent. */
> -	if (slot_inuse) {
> +	if (flags & NFSD4_SLOT_INUSE) {
>   		if (seqid == slot_seqid)
>   			return nfserr_jukebox;
>   		else
> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>   	/* Note unsigned 32-bit arithmetic handles wraparound: */
>   	if (likely(seqid == slot_seqid + 1))
>   		return nfs_ok;
> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> +		return nfs_ok;
>   	if (seqid == slot_seqid)
>   		return nfserr_replay_cache;
>   	return nfserr_seq_misordered;
> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>   
>   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>   	if (status == nfserr_replay_cache) {
>   		status = nfserr_seq_misordered;
>   		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	if (status)
>   		goto out_put_session;
>   
> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> +	    slot->sl_generation == session->se_slot_gen &&
> +	    seq->maxslots <= session->se_target_maxslots)
> +		/* Client acknowledged our reduce maxreqs */
> +		free_session_slots(session, session->se_target_maxslots);
> +
>   	buflen = (seq->cachethis) ?
>   			session->se_fchannel.maxresp_cached :
>   			session->se_fchannel.maxresp_sz;
> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	svc_reserve(rqstp, buflen);
>   
>   	status = nfs_ok;
> -	/* Success! bump slot seqid */
> +	/* Success! accept new slot seqid */
>   	slot->sl_seqid = seq->seqid;
> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>   	slot->sl_flags |= NFSD4_SLOT_INUSE;
> +	slot->sl_generation = session->se_slot_gen;
>   	if (seq->cachethis)
>   		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>   	else
> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   	 * the client might use.
>   	 */
>   	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>   	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>   		int s = session->se_fchannel.maxreqs;
>   		int cnt = DIV_ROUND_UP(s, 5);
> +		void *prev_slot;
>   
>   		do {
>   			/*
> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>   			 */
>   			slot = kzalloc(slot_bytes(&session->se_fchannel),
>   				       GFP_NOWAIT);
> +			prev_slot = xa_load(&session->se_slots, s);
> +			if (xa_is_value(prev_slot) && slot) {
> +				slot->sl_seqid = xa_to_value(prev_slot);
> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> +			}
>   			if (slot &&
>   			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>   						GFP_NOWAIT))) {
>   				s += 1;
>   				session->se_fchannel.maxreqs = s;
> +				session->se_target_maxslots = s;
>   			} else {
>   				kfree(slot);
>   				slot = NULL;
>   			}
>   		} while (slot && --cnt > 0);
>   	}
> -	seq->maxslots = session->se_fchannel.maxreqs;
> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> +	seq->target_maxslots = session->se_target_maxslots;
>   
>   out:
>   	switch (clp->cl_cb_state) {
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 53fac037611c..4dcb03cd9292 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>   		return nfserr_bad_xdr;
>   	seq->seqid = be32_to_cpup(p++);
>   	seq->slotid = be32_to_cpup(p++);
> -	seq->maxslots = be32_to_cpup(p++);
> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> +	seq->maxslots = be32_to_cpup(p++) + 1;
>   	seq->cachethis = be32_to_cpup(p);
>   
>   	seq->status_flags = 0;
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_status_flags */
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index aad547d3ad8b..4251ff3c5ad1 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -245,10 +245,12 @@ struct nfsd4_slot {
>   	struct svc_cred sl_cred;
>   	u32	sl_datalen;
>   	u16	sl_opcnt;
> +	u16	sl_generation;
>   #define NFSD4_SLOT_INUSE	(1 << 0)
>   #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>   #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>   #define NFSD4_SLOT_CACHED	(1 << 3)
> +#define NFSD4_SLOT_REUSED	(1 << 4)
>   	u8	sl_flags;
>   	char	sl_data[];
>   };
> @@ -321,7 +323,6 @@ struct nfsd4_session {
>   	u32			se_cb_slot_avail; /* bitmap of available slots */
>   	u32			se_cb_highest_slot;	/* highest slot client wants */
>   	u32			se_cb_prog;
> -	bool			se_dead;
>   	struct list_head	se_hash;	/* hash by sessionid */
>   	struct list_head	se_perclnt;
>   	struct nfs4_client	*se_client;
> @@ -331,6 +332,9 @@ struct nfsd4_session {
>   	struct list_head	se_conns;
>   	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>   	struct xarray		se_slots;	/* forward channel slots */
> +	u16			se_slot_gen;
> +	bool			se_dead;
> +	u32			se_target_maxslots;
>   };
>   
>   /* formatted contents of nfs4_sessionid */
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index 382cc1389396..c26ba86dbdfd 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>   	u32			slotid;			/* request/response */
>   	u32			maxslots;		/* request/response */
>   	u32			cachethis;		/* request */
> -#if 0
>   	u32			target_maxslots;	/* response */
> -#endif /* not yet */
>   	u32			status_flags;		/* response */
>   };
>   

Hi Neil -

I've found some misbehavior which I've bisected to this commit.

If disconnect injection is set up to break the connection every 25,000
RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
after this commit is applied.

Network capture shows that the server eventually starts returning
SEQ_MISORDERED because the client has forgotten an retired slot after a
disconnect, and tries to use sequence number 1 for that slot with a new
operation.

I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
client. This function uses a bit of calculus to decide when to bump the
slot table's generation number. In the failing case, it never bumps the
generation, and that results in the client freeing slots it shouldn't.
The server's reported { highest, target_highest } slot numbers don't
appear to change correctly after the client has reconnected.

If I revert this hunk from 5/6:

@@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres 
*resp, __be32 nfserr,
  	if (nfserr != nfs_ok)
  		return nfserr;
  	/* sr_target_highest_slotid */
-	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
+	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
  	if (nfserr != nfs_ok)
  		return nfserr;
  	/* sr_status_flags */

the reproducer above runs to completion in the expected amount of time.


The high order bit here is whether I should drop these patches for
v6.14, or whether you believe you can come up with a narrow solution
during the early part of v6.14-rc that I can include in an -rc update
for NFSD. I can't really tell if a significant amount of surgery will
be necessary.

What do you think?


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-19  2:01   ` Chuck Lever
@ 2025-01-21  2:36     ` NeilBrown
  2025-01-21 16:24       ` Chuck Lever
  2025-01-27  4:08     ` NeilBrown
  1 sibling, 1 reply; 47+ messages in thread
From: NeilBrown @ 2025-01-21  2:36 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Sun, 19 Jan 2025, Chuck Lever wrote:
> On 12/11/24 4:47 PM, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 16 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > se_dead is moved in struct nfsd4_session to remove a hole.
> > 
> > Reviewed-by: Jeff Layton <jlayton@kernel.org>
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >   fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
> >   fs/nfsd/nfs4xdr.c   |  5 ++-
> >   fs/nfsd/state.h     |  6 ++-
> >   fs/nfsd/xdr4.h      |  2 -
> >   4 files changed, 92 insertions(+), 15 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fd9473d487f3..a2d1f97b8a0e 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
> >   #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >   
> >   static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >   {
> >   	int i;
> >   
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >   		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >   
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> >   		free_svc_cred(&slot->sl_cred);
> >   		kfree(slot);
> >   	}
> > +	ses->se_fchannel.maxreqs = from;
> > +	if (ses->se_target_maxslots > from)
> > +		ses->se_target_maxslots = from;
> > +}
> > +
> > +/**
> > + * reduce_session_slots - reduce the target max-slots of a session if possible
> > + * @ses:  The session to affect
> > + * @dec:  how much to decrease the target by
> > + *
> > + * This interface can be used by a shrinker to reduce the target max-slots
> > + * for a session so that some slots can eventually be freed.
> > + * It uses spin_trylock() as it may be called in a context where another
> > + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> > + * best-effort, skiping a session is client_lock is already held has no
> > + * great coast
> > + *
> > + * Return value:
> > + *   The number of slots that the target was reduced by.
> > + */
> > +static int __maybe_unused
> > +reduce_session_slots(struct nfsd4_session *ses, int dec)
> > +{
> > +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> > +					  nfsd_net_id);
> > +	int ret = 0;
> > +
> > +	if (ses->se_target_maxslots <= 1)
> > +		return ret;
> > +	if (!spin_trylock(&nn->client_lock))
> > +		return ret;
> > +	ret = min(dec, ses->se_target_maxslots-1);
> > +	ses->se_target_maxslots -= ret;
> > +	ses->se_slot_gen += 1;
> > +	if (ses->se_slot_gen == 0) {
> > +		int i;
> > +		ses->se_slot_gen = 1;
> > +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > +			slot->sl_generation = 0;
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	return ret;
> >   }
> >   
> >   /*
> > @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >   	}
> >   	fattrs->maxreqs = i;
> >   	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> > +	new->se_target_maxslots = i;
> >   	new->se_cb_slot_avail = ~0U;
> >   	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
> >   				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> > @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
> >   
> >   static void __free_session(struct nfsd4_session *ses)
> >   {
> > -	free_session_slots(ses);
> > +	free_session_slots(ses, 0);
> >   	xa_destroy(&ses->se_slots);
> >   	kfree(ses);
> >   }
> > @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
> >   	seq_printf(m, "session slots:");
> >   	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >   		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > +	seq_printf(m, "\nsession target slots:");
> > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > +		seq_printf(m, " %u", ses->se_target_maxslots);
> >   	spin_unlock(&clp->cl_lock);
> >   	seq_puts(m, "\n");
> >   
> > @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
> >   	kfree(exid->server_impl_name);
> >   }
> >   
> > -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> > +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
> >   {
> >   	/* The slot is in use, and no response has been sent. */
> > -	if (slot_inuse) {
> > +	if (flags & NFSD4_SLOT_INUSE) {
> >   		if (seqid == slot_seqid)
> >   			return nfserr_jukebox;
> >   		else
> > @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >   	/* Note unsigned 32-bit arithmetic handles wraparound: */
> >   	if (likely(seqid == slot_seqid + 1))
> >   		return nfs_ok;
> > +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> > +		return nfs_ok;
> >   	if (seqid == slot_seqid)
> >   		return nfserr_replay_cache;
> >   	return nfserr_seq_misordered;
> > @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >   
> >   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> > -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> > -					slot->sl_flags & NFSD4_SLOT_INUSE);
> > +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
> >   	if (status == nfserr_replay_cache) {
> >   		status = nfserr_seq_misordered;
> >   		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> > @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	if (status)
> >   		goto out_put_session;
> >   
> > +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> > +	    slot->sl_generation == session->se_slot_gen &&
> > +	    seq->maxslots <= session->se_target_maxslots)
> > +		/* Client acknowledged our reduce maxreqs */
> > +		free_session_slots(session, session->se_target_maxslots);
> > +
> >   	buflen = (seq->cachethis) ?
> >   			session->se_fchannel.maxresp_cached :
> >   			session->se_fchannel.maxresp_sz;
> > @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	svc_reserve(rqstp, buflen);
> >   
> >   	status = nfs_ok;
> > -	/* Success! bump slot seqid */
> > +	/* Success! accept new slot seqid */
> >   	slot->sl_seqid = seq->seqid;
> > +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
> >   	slot->sl_flags |= NFSD4_SLOT_INUSE;
> > +	slot->sl_generation = session->se_slot_gen;
> >   	if (seq->cachethis)
> >   		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> >   	else
> > @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	 * the client might use.
> >   	 */
> >   	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
> >   	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> >   		int s = session->se_fchannel.maxreqs;
> >   		int cnt = DIV_ROUND_UP(s, 5);
> > +		void *prev_slot;
> >   
> >   		do {
> >   			/*
> > @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   			 */
> >   			slot = kzalloc(slot_bytes(&session->se_fchannel),
> >   				       GFP_NOWAIT);
> > +			prev_slot = xa_load(&session->se_slots, s);
> > +			if (xa_is_value(prev_slot) && slot) {
> > +				slot->sl_seqid = xa_to_value(prev_slot);
> > +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> > +			}
> >   			if (slot &&
> >   			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> >   						GFP_NOWAIT))) {
> >   				s += 1;
> >   				session->se_fchannel.maxreqs = s;
> > +				session->se_target_maxslots = s;
> >   			} else {
> >   				kfree(slot);
> >   				slot = NULL;
> >   			}
> >   		} while (slot && --cnt > 0);
> >   	}
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> > +	seq->target_maxslots = session->se_target_maxslots;
> >   
> >   out:
> >   	switch (clp->cl_cb_state) {
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 53fac037611c..4dcb03cd9292 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> >   		return nfserr_bad_xdr;
> >   	seq->seqid = be32_to_cpup(p++);
> >   	seq->slotid = be32_to_cpup(p++);
> > -	seq->maxslots = be32_to_cpup(p++);
> > +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> > +	seq->maxslots = be32_to_cpup(p++) + 1;
> >   	seq->cachethis = be32_to_cpup(p);
> >   
> >   	seq->status_flags = 0;
> > @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_target_highest_slotid */
> > -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> > +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_status_flags */
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index aad547d3ad8b..4251ff3c5ad1 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -245,10 +245,12 @@ struct nfsd4_slot {
> >   	struct svc_cred sl_cred;
> >   	u32	sl_datalen;
> >   	u16	sl_opcnt;
> > +	u16	sl_generation;
> >   #define NFSD4_SLOT_INUSE	(1 << 0)
> >   #define NFSD4_SLOT_CACHETHIS	(1 << 1)
> >   #define NFSD4_SLOT_INITIALIZED	(1 << 2)
> >   #define NFSD4_SLOT_CACHED	(1 << 3)
> > +#define NFSD4_SLOT_REUSED	(1 << 4)
> >   	u8	sl_flags;
> >   	char	sl_data[];
> >   };
> > @@ -321,7 +323,6 @@ struct nfsd4_session {
> >   	u32			se_cb_slot_avail; /* bitmap of available slots */
> >   	u32			se_cb_highest_slot;	/* highest slot client wants */
> >   	u32			se_cb_prog;
> > -	bool			se_dead;
> >   	struct list_head	se_hash;	/* hash by sessionid */
> >   	struct list_head	se_perclnt;
> >   	struct nfs4_client	*se_client;
> > @@ -331,6 +332,9 @@ struct nfsd4_session {
> >   	struct list_head	se_conns;
> >   	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
> >   	struct xarray		se_slots;	/* forward channel slots */
> > +	u16			se_slot_gen;
> > +	bool			se_dead;
> > +	u32			se_target_maxslots;
> >   };
> >   
> >   /* formatted contents of nfs4_sessionid */
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 382cc1389396..c26ba86dbdfd 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >   	u32			slotid;			/* request/response */
> >   	u32			maxslots;		/* request/response */
> >   	u32			cachethis;		/* request */
> > -#if 0
> >   	u32			target_maxslots;	/* response */
> > -#endif /* not yet */
> >   	u32			status_flags;		/* response */
> >   };
> >   
> 
> Hi Neil -
> 
> I've found some misbehavior which I've bisected to this commit.
> 
> If disconnect injection is set up to break the connection every 25,000
> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
> after this commit is applied.
> 
> Network capture shows that the server eventually starts returning
> SEQ_MISORDERED because the client has forgotten an retired slot after a
> disconnect, and tries to use sequence number 1 for that slot with a new
> operation.
> 
> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
> client. This function uses a bit of calculus to decide when to bump the
> slot table's generation number. In the failing case, it never bumps the
> generation, and that results in the client freeing slots it shouldn't.
> The server's reported { highest, target_highest } slot numbers don't
> appear to change correctly after the client has reconnected.
> 
> If I revert this hunk from 5/6:
> 
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres 
> *resp, __be32 nfserr,
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_status_flags */
> 
> the reproducer above runs to completion in the expected amount of time.
> 
> 
> The high order bit here is whether I should drop these patches for
> v6.14, or whether you believe you can come up with a narrow solution
> during the early part of v6.14-rc that I can include in an -rc update
> for NFSD. I can't really tell if a significant amount of surgery will
> be necessary.
> 
> What do you think?

I think I can fix it.

It sounds like it might be a client bug, or it might be a difference in
interpretation of the spec, or it might be an ambiguity in the spec.

But I think I can fix it by setting NFSD_SLOT_REUSED in any slots
beyond ->target_maxslots when I reduce target_maxslots.  That makes it
so that we accept either 1 or the next-in-sequence seqid.
Doing that does open up a possible risk of a resend being accepted as a
new request.  As it is a new connection, resends are a real possibility.
I'll have to ponder that a bit more and might need to be careful about
retiring slots with a low seqid which have been used recently.  Maybe I
need to store the time when seqid 1 was used, and not return a slot
until some minimum time after that timestamp.

I'll try to get you a patch before the end of next week.

Thanks,
NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-21  2:36     ` NeilBrown
@ 2025-01-21 16:24       ` Chuck Lever
  0 siblings, 0 replies; 47+ messages in thread
From: Chuck Lever @ 2025-01-21 16:24 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 1/20/25 9:36 PM, NeilBrown wrote:
> On Sun, 19 Jan 2025, Chuck Lever wrote:
>> On 12/11/24 4:47 PM, NeilBrown wrote:
>>> Reducing the number of slots in the session slot table requires
>>> confirmation from the client.  This patch adds reduce_session_slots()
>>> which starts the process of getting confirmation, but never calls it.
>>> That will come in a later patch.
>>>
>>> Before we can free a slot we need to confirm that the client won't try
>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>> is not larger than we limit we are trying to impose.  So for each slot
>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>
>>> To achieve this we introduce a concept of request "generations".  Each
>>> time we decide to reduce cr_maxrequests we increment the generation
>>> number, and record this when we return the lower cr_maxrequests to the
>>> client.  When a slot with the current generation reports a low
>>> ca_maxrequests, we commit to that level and free extra slots.
>>>
>>> We use an 16 bit generation number (64 seems wasteful) and if it cycles
>>> we iterate all slots and reset the generation number to avoid false matches.
>>>
>>> When we free a slot we store the seqid in the slot pointer so that it can
>>> be restored when we reactivate the slot.  The RFC can be read as
>>> suggesting that the slot number could restart from one after a slot is
>>> retired and reactivated, but also suggests that retiring slots is not
>>> required.  So when we reactive a slot we accept with the next seqid in
>>> sequence, or 1.
>>>
>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>> matches how it is encoded for the reply.
>>>
>>> se_dead is moved in struct nfsd4_session to remove a hole.
>>>
>>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>> ---
>>>    fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
>>>    fs/nfsd/nfs4xdr.c   |  5 ++-
>>>    fs/nfsd/state.h     |  6 ++-
>>>    fs/nfsd/xdr4.h      |  2 -
>>>    4 files changed, 92 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index fd9473d487f3..a2d1f97b8a0e 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
>>>    #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>    
>>>    static void
>>> -free_session_slots(struct nfsd4_session *ses)
>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>    {
>>>    	int i;
>>>    
>>> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +	if (from >= ses->se_fchannel.maxreqs)
>>> +		return;
>>> +
>>> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>    		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>    
>>> -		xa_erase(&ses->se_slots, i);
>>> +		/*
>>> +		 * Save the seqid in case we reactivate this slot.
>>> +		 * This will never require a memory allocation so GFP
>>> +		 * flag is irrelevant
>>> +		 */
>>> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>>>    		free_svc_cred(&slot->sl_cred);
>>>    		kfree(slot);
>>>    	}
>>> +	ses->se_fchannel.maxreqs = from;
>>> +	if (ses->se_target_maxslots > from)
>>> +		ses->se_target_maxslots = from;
>>> +}
>>> +
>>> +/**
>>> + * reduce_session_slots - reduce the target max-slots of a session if possible
>>> + * @ses:  The session to affect
>>> + * @dec:  how much to decrease the target by
>>> + *
>>> + * This interface can be used by a shrinker to reduce the target max-slots
>>> + * for a session so that some slots can eventually be freed.
>>> + * It uses spin_trylock() as it may be called in a context where another
>>> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
>>> + * best-effort, skiping a session is client_lock is already held has no
>>> + * great coast
>>> + *
>>> + * Return value:
>>> + *   The number of slots that the target was reduced by.
>>> + */
>>> +static int __maybe_unused
>>> +reduce_session_slots(struct nfsd4_session *ses, int dec)
>>> +{
>>> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
>>> +					  nfsd_net_id);
>>> +	int ret = 0;
>>> +
>>> +	if (ses->se_target_maxslots <= 1)
>>> +		return ret;
>>> +	if (!spin_trylock(&nn->client_lock))
>>> +		return ret;
>>> +	ret = min(dec, ses->se_target_maxslots-1);
>>> +	ses->se_target_maxslots -= ret;
>>> +	ses->se_slot_gen += 1;
>>> +	if (ses->se_slot_gen == 0) {
>>> +		int i;
>>> +		ses->se_slot_gen = 1;
>>> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>> +			slot->sl_generation = 0;
>>> +		}
>>> +	}
>>> +	spin_unlock(&nn->client_lock);
>>> +	return ret;
>>>    }
>>>    
>>>    /*
>>> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>>>    	}
>>>    	fattrs->maxreqs = i;
>>>    	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>>> +	new->se_target_maxslots = i;
>>>    	new->se_cb_slot_avail = ~0U;
>>>    	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>>>    				      NFSD_BC_SLOT_TABLE_SIZE - 1);
>>> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>>>    
>>>    static void __free_session(struct nfsd4_session *ses)
>>>    {
>>> -	free_session_slots(ses);
>>> +	free_session_slots(ses, 0);
>>>    	xa_destroy(&ses->se_slots);
>>>    	kfree(ses);
>>>    }
>>> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
>>>    	seq_printf(m, "session slots:");
>>>    	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>>    		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
>>> +	seq_printf(m, "\nsession target slots:");
>>> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>> +		seq_printf(m, " %u", ses->se_target_maxslots);
>>>    	spin_unlock(&clp->cl_lock);
>>>    	seq_puts(m, "\n");
>>>    
>>> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>>>    	kfree(exid->server_impl_name);
>>>    }
>>>    
>>> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>>>    {
>>>    	/* The slot is in use, and no response has been sent. */
>>> -	if (slot_inuse) {
>>> +	if (flags & NFSD4_SLOT_INUSE) {
>>>    		if (seqid == slot_seqid)
>>>    			return nfserr_jukebox;
>>>    		else
>>> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>>    	/* Note unsigned 32-bit arithmetic handles wraparound: */
>>>    	if (likely(seqid == slot_seqid + 1))
>>>    		return nfs_ok;
>>> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
>>> +		return nfs_ok;
>>>    	if (seqid == slot_seqid)
>>>    		return nfserr_replay_cache;
>>>    	return nfserr_seq_misordered;
>>> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>>>    
>>>    	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>>> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>>> -					slot->sl_flags & NFSD4_SLOT_INUSE);
>>> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>>>    	if (status == nfserr_replay_cache) {
>>>    		status = nfserr_seq_misordered;
>>>    		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
>>> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	if (status)
>>>    		goto out_put_session;
>>>    
>>> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
>>> +	    slot->sl_generation == session->se_slot_gen &&
>>> +	    seq->maxslots <= session->se_target_maxslots)
>>> +		/* Client acknowledged our reduce maxreqs */
>>> +		free_session_slots(session, session->se_target_maxslots);
>>> +
>>>    	buflen = (seq->cachethis) ?
>>>    			session->se_fchannel.maxresp_cached :
>>>    			session->se_fchannel.maxresp_sz;
>>> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	svc_reserve(rqstp, buflen);
>>>    
>>>    	status = nfs_ok;
>>> -	/* Success! bump slot seqid */
>>> +	/* Success! accept new slot seqid */
>>>    	slot->sl_seqid = seq->seqid;
>>> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>>>    	slot->sl_flags |= NFSD4_SLOT_INUSE;
>>> +	slot->sl_generation = session->se_slot_gen;
>>>    	if (seq->cachethis)
>>>    		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>>>    	else
>>> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	 * the client might use.
>>>    	 */
>>>    	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
>>> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>>>    	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>>>    		int s = session->se_fchannel.maxreqs;
>>>    		int cnt = DIV_ROUND_UP(s, 5);
>>> +		void *prev_slot;
>>>    
>>>    		do {
>>>    			/*
>>> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    			 */
>>>    			slot = kzalloc(slot_bytes(&session->se_fchannel),
>>>    				       GFP_NOWAIT);
>>> +			prev_slot = xa_load(&session->se_slots, s);
>>> +			if (xa_is_value(prev_slot) && slot) {
>>> +				slot->sl_seqid = xa_to_value(prev_slot);
>>> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
>>> +			}
>>>    			if (slot &&
>>>    			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>>>    						GFP_NOWAIT))) {
>>>    				s += 1;
>>>    				session->se_fchannel.maxreqs = s;
>>> +				session->se_target_maxslots = s;
>>>    			} else {
>>>    				kfree(slot);
>>>    				slot = NULL;
>>>    			}
>>>    		} while (slot && --cnt > 0);
>>>    	}
>>> -	seq->maxslots = session->se_fchannel.maxreqs;
>>> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
>>> +	seq->target_maxslots = session->se_target_maxslots;
>>>    
>>>    out:
>>>    	switch (clp->cl_cb_state) {
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 53fac037611c..4dcb03cd9292 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>>>    		return nfserr_bad_xdr;
>>>    	seq->seqid = be32_to_cpup(p++);
>>>    	seq->slotid = be32_to_cpup(p++);
>>> -	seq->maxslots = be32_to_cpup(p++);
>>> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
>>> +	seq->maxslots = be32_to_cpup(p++) + 1;
>>>    	seq->cachethis = be32_to_cpup(p);
>>>    
>>>    	seq->status_flags = 0;
>>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_target_highest_slotid */
>>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_status_flags */
>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>> index aad547d3ad8b..4251ff3c5ad1 100644
>>> --- a/fs/nfsd/state.h
>>> +++ b/fs/nfsd/state.h
>>> @@ -245,10 +245,12 @@ struct nfsd4_slot {
>>>    	struct svc_cred sl_cred;
>>>    	u32	sl_datalen;
>>>    	u16	sl_opcnt;
>>> +	u16	sl_generation;
>>>    #define NFSD4_SLOT_INUSE	(1 << 0)
>>>    #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>>>    #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>>>    #define NFSD4_SLOT_CACHED	(1 << 3)
>>> +#define NFSD4_SLOT_REUSED	(1 << 4)
>>>    	u8	sl_flags;
>>>    	char	sl_data[];
>>>    };
>>> @@ -321,7 +323,6 @@ struct nfsd4_session {
>>>    	u32			se_cb_slot_avail; /* bitmap of available slots */
>>>    	u32			se_cb_highest_slot;	/* highest slot client wants */
>>>    	u32			se_cb_prog;
>>> -	bool			se_dead;
>>>    	struct list_head	se_hash;	/* hash by sessionid */
>>>    	struct list_head	se_perclnt;
>>>    	struct nfs4_client	*se_client;
>>> @@ -331,6 +332,9 @@ struct nfsd4_session {
>>>    	struct list_head	se_conns;
>>>    	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>>>    	struct xarray		se_slots;	/* forward channel slots */
>>> +	u16			se_slot_gen;
>>> +	bool			se_dead;
>>> +	u32			se_target_maxslots;
>>>    };
>>>    
>>>    /* formatted contents of nfs4_sessionid */
>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>> index 382cc1389396..c26ba86dbdfd 100644
>>> --- a/fs/nfsd/xdr4.h
>>> +++ b/fs/nfsd/xdr4.h
>>> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>>>    	u32			slotid;			/* request/response */
>>>    	u32			maxslots;		/* request/response */
>>>    	u32			cachethis;		/* request */
>>> -#if 0
>>>    	u32			target_maxslots;	/* response */
>>> -#endif /* not yet */
>>>    	u32			status_flags;		/* response */
>>>    };
>>>    
>>
>> Hi Neil -
>>
>> I've found some misbehavior which I've bisected to this commit.
>>
>> If disconnect injection is set up to break the connection every 25,000
>> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
>> after this commit is applied.
>>
>> Network capture shows that the server eventually starts returning
>> SEQ_MISORDERED because the client has forgotten an retired slot after a
>> disconnect, and tries to use sequence number 1 for that slot with a new
>> operation.
>>
>> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
>> client. This function uses a bit of calculus to decide when to bump the
>> slot table's generation number. In the failing case, it never bumps the
>> generation, and that results in the client freeing slots it shouldn't.
>> The server's reported { highest, target_highest } slot numbers don't
>> appear to change correctly after the client has reconnected.
>>
>> If I revert this hunk from 5/6:
>>
>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres
>> *resp, __be32 nfserr,
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_target_highest_slotid */
>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_status_flags */
>>
>> the reproducer above runs to completion in the expected amount of time.
>>
>>
>> The high order bit here is whether I should drop these patches for
>> v6.14, or whether you believe you can come up with a narrow solution
>> during the early part of v6.14-rc that I can include in an -rc update
>> for NFSD. I can't really tell if a significant amount of surgery will
>> be necessary.
>>
>> What do you think?
> 
> I think I can fix it.
> 
> It sounds like it might be a client bug, or it might be a difference in
> interpretation of the spec, or it might be an ambiguity in the spec.

Perhaps, but consider that the Linux NFS client interoperates
successfully with v6.13 NFSD, Solaris's NFSv4.1 implementation,
NetApp's implementation, and Hammerspace, and has done for many
years.

I agree that the spec language isn't particularly transparent, but
somehow all of these implementations have interpreted it the same
way.


> But I think I can fix it by setting NFSD_SLOT_REUSED in any slots
> beyond ->target_maxslots when I reduce target_maxslots.  That makes it
> so that we accept either 1 or the next-in-sequence seqid.
> Doing that does open up a possible risk of a resend being accepted as a
> new request.  As it is a new connection, resends are a real possibility.
> I'll have to ponder that a bit more and might need to be careful about
> retiring slots with a low seqid which have been used recently.  Maybe I
> need to store the time when seqid 1 was used, and not return a slot
> until some minimum time after that timestamp.

I gathered additional information using a trace_printk() in the client's
decode_sequence() function.

In the working case, after reconnect, the server SEQUENCE response has

   sr_highest_slotid = 18
   sr_target_highest_slot = 18

IIUC 18 is the slot number of the highest slot currently in use.

In the stalling case, after reconnect, the server SEQUENCE response has

   sr_highest_slotid = 23
   sr_target_highest_slot = 63

The client heuristics decide to free slots 23 and higher, even though
one or more of those slots have been active and have sequence numbers
larger than 1.

I can provide more detailed reproducer instructions if you wish.


> I'll try to get you a patch before the end of next week.

Thanks!


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-19  2:01   ` Chuck Lever
  2025-01-21  2:36     ` NeilBrown
@ 2025-01-27  4:08     ` NeilBrown
  2025-01-27 13:57       ` Chuck Lever
  1 sibling, 1 reply; 47+ messages in thread
From: NeilBrown @ 2025-01-27  4:08 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Sun, 19 Jan 2025, Chuck Lever wrote:
> On 12/11/24 4:47 PM, NeilBrown wrote:
> > Reducing the number of slots in the session slot table requires
> > confirmation from the client.  This patch adds reduce_session_slots()
> > which starts the process of getting confirmation, but never calls it.
> > That will come in a later patch.
> > 
> > Before we can free a slot we need to confirm that the client won't try
> > to use it again.  This involves returning a lower cr_maxrequests in a
> > SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> > is not larger than we limit we are trying to impose.  So for each slot
> > we need to remember that we have sent a reduced cr_maxrequests.
> > 
> > To achieve this we introduce a concept of request "generations".  Each
> > time we decide to reduce cr_maxrequests we increment the generation
> > number, and record this when we return the lower cr_maxrequests to the
> > client.  When a slot with the current generation reports a low
> > ca_maxrequests, we commit to that level and free extra slots.
> > 
> > We use an 16 bit generation number (64 seems wasteful) and if it cycles
> > we iterate all slots and reset the generation number to avoid false matches.
> > 
> > When we free a slot we store the seqid in the slot pointer so that it can
> > be restored when we reactivate the slot.  The RFC can be read as
> > suggesting that the slot number could restart from one after a slot is
> > retired and reactivated, but also suggests that retiring slots is not
> > required.  So when we reactive a slot we accept with the next seqid in
> > sequence, or 1.
> > 
> > When decoding sa_highest_slotid into maxslots we need to add 1 - this
> > matches how it is encoded for the reply.
> > 
> > se_dead is moved in struct nfsd4_session to remove a hole.
> > 
> > Reviewed-by: Jeff Layton <jlayton@kernel.org>
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >   fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
> >   fs/nfsd/nfs4xdr.c   |  5 ++-
> >   fs/nfsd/state.h     |  6 ++-
> >   fs/nfsd/xdr4.h      |  2 -
> >   4 files changed, 92 insertions(+), 15 deletions(-)
> > 
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index fd9473d487f3..a2d1f97b8a0e 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
> >   #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >   
> >   static void
> > -free_session_slots(struct nfsd4_session *ses)
> > +free_session_slots(struct nfsd4_session *ses, int from)
> >   {
> >   	int i;
> >   
> > -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +	if (from >= ses->se_fchannel.maxreqs)
> > +		return;
> > +
> > +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >   		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >   
> > -		xa_erase(&ses->se_slots, i);
> > +		/*
> > +		 * Save the seqid in case we reactivate this slot.
> > +		 * This will never require a memory allocation so GFP
> > +		 * flag is irrelevant
> > +		 */
> > +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> >   		free_svc_cred(&slot->sl_cred);
> >   		kfree(slot);
> >   	}
> > +	ses->se_fchannel.maxreqs = from;
> > +	if (ses->se_target_maxslots > from)
> > +		ses->se_target_maxslots = from;
> > +}
> > +
> > +/**
> > + * reduce_session_slots - reduce the target max-slots of a session if possible
> > + * @ses:  The session to affect
> > + * @dec:  how much to decrease the target by
> > + *
> > + * This interface can be used by a shrinker to reduce the target max-slots
> > + * for a session so that some slots can eventually be freed.
> > + * It uses spin_trylock() as it may be called in a context where another
> > + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> > + * best-effort, skiping a session is client_lock is already held has no
> > + * great coast
> > + *
> > + * Return value:
> > + *   The number of slots that the target was reduced by.
> > + */
> > +static int __maybe_unused
> > +reduce_session_slots(struct nfsd4_session *ses, int dec)
> > +{
> > +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> > +					  nfsd_net_id);
> > +	int ret = 0;
> > +
> > +	if (ses->se_target_maxslots <= 1)
> > +		return ret;
> > +	if (!spin_trylock(&nn->client_lock))
> > +		return ret;
> > +	ret = min(dec, ses->se_target_maxslots-1);
> > +	ses->se_target_maxslots -= ret;
> > +	ses->se_slot_gen += 1;
> > +	if (ses->se_slot_gen == 0) {
> > +		int i;
> > +		ses->se_slot_gen = 1;
> > +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> > +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> > +			slot->sl_generation = 0;
> > +		}
> > +	}
> > +	spin_unlock(&nn->client_lock);
> > +	return ret;
> >   }
> >   
> >   /*
> > @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >   	}
> >   	fattrs->maxreqs = i;
> >   	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> > +	new->se_target_maxslots = i;
> >   	new->se_cb_slot_avail = ~0U;
> >   	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
> >   				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> > @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
> >   
> >   static void __free_session(struct nfsd4_session *ses)
> >   {
> > -	free_session_slots(ses);
> > +	free_session_slots(ses, 0);
> >   	xa_destroy(&ses->se_slots);
> >   	kfree(ses);
> >   }
> > @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
> >   	seq_printf(m, "session slots:");
> >   	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >   		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> > +	seq_printf(m, "\nsession target slots:");
> > +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> > +		seq_printf(m, " %u", ses->se_target_maxslots);
> >   	spin_unlock(&clp->cl_lock);
> >   	seq_puts(m, "\n");
> >   
> > @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
> >   	kfree(exid->server_impl_name);
> >   }
> >   
> > -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> > +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
> >   {
> >   	/* The slot is in use, and no response has been sent. */
> > -	if (slot_inuse) {
> > +	if (flags & NFSD4_SLOT_INUSE) {
> >   		if (seqid == slot_seqid)
> >   			return nfserr_jukebox;
> >   		else
> > @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >   	/* Note unsigned 32-bit arithmetic handles wraparound: */
> >   	if (likely(seqid == slot_seqid + 1))
> >   		return nfs_ok;
> > +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> > +		return nfs_ok;
> >   	if (seqid == slot_seqid)
> >   		return nfserr_replay_cache;
> >   	return nfserr_seq_misordered;
> > @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >   
> >   	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> > -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> > -					slot->sl_flags & NFSD4_SLOT_INUSE);
> > +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
> >   	if (status == nfserr_replay_cache) {
> >   		status = nfserr_seq_misordered;
> >   		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> > @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	if (status)
> >   		goto out_put_session;
> >   
> > +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> > +	    slot->sl_generation == session->se_slot_gen &&
> > +	    seq->maxslots <= session->se_target_maxslots)
> > +		/* Client acknowledged our reduce maxreqs */
> > +		free_session_slots(session, session->se_target_maxslots);
> > +
> >   	buflen = (seq->cachethis) ?
> >   			session->se_fchannel.maxresp_cached :
> >   			session->se_fchannel.maxresp_sz;
> > @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	svc_reserve(rqstp, buflen);
> >   
> >   	status = nfs_ok;
> > -	/* Success! bump slot seqid */
> > +	/* Success! accept new slot seqid */
> >   	slot->sl_seqid = seq->seqid;
> > +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
> >   	slot->sl_flags |= NFSD4_SLOT_INUSE;
> > +	slot->sl_generation = session->se_slot_gen;
> >   	if (seq->cachethis)
> >   		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> >   	else
> > @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   	 * the client might use.
> >   	 */
> >   	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> > +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
> >   	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> >   		int s = session->se_fchannel.maxreqs;
> >   		int cnt = DIV_ROUND_UP(s, 5);
> > +		void *prev_slot;
> >   
> >   		do {
> >   			/*
> > @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >   			 */
> >   			slot = kzalloc(slot_bytes(&session->se_fchannel),
> >   				       GFP_NOWAIT);
> > +			prev_slot = xa_load(&session->se_slots, s);
> > +			if (xa_is_value(prev_slot) && slot) {
> > +				slot->sl_seqid = xa_to_value(prev_slot);
> > +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> > +			}
> >   			if (slot &&
> >   			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> >   						GFP_NOWAIT))) {
> >   				s += 1;
> >   				session->se_fchannel.maxreqs = s;
> > +				session->se_target_maxslots = s;
> >   			} else {
> >   				kfree(slot);
> >   				slot = NULL;
> >   			}
> >   		} while (slot && --cnt > 0);
> >   	}
> > -	seq->maxslots = session->se_fchannel.maxreqs;
> > +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> > +	seq->target_maxslots = session->se_target_maxslots;
> >   
> >   out:
> >   	switch (clp->cl_cb_state) {
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 53fac037611c..4dcb03cd9292 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> >   		return nfserr_bad_xdr;
> >   	seq->seqid = be32_to_cpup(p++);
> >   	seq->slotid = be32_to_cpup(p++);
> > -	seq->maxslots = be32_to_cpup(p++);
> > +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> > +	seq->maxslots = be32_to_cpup(p++) + 1;
> >   	seq->cachethis = be32_to_cpup(p);
> >   
> >   	seq->status_flags = 0;
> > @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_target_highest_slotid */
> > -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> > +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
> >   	if (nfserr != nfs_ok)
> >   		return nfserr;
> >   	/* sr_status_flags */
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index aad547d3ad8b..4251ff3c5ad1 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -245,10 +245,12 @@ struct nfsd4_slot {
> >   	struct svc_cred sl_cred;
> >   	u32	sl_datalen;
> >   	u16	sl_opcnt;
> > +	u16	sl_generation;
> >   #define NFSD4_SLOT_INUSE	(1 << 0)
> >   #define NFSD4_SLOT_CACHETHIS	(1 << 1)
> >   #define NFSD4_SLOT_INITIALIZED	(1 << 2)
> >   #define NFSD4_SLOT_CACHED	(1 << 3)
> > +#define NFSD4_SLOT_REUSED	(1 << 4)
> >   	u8	sl_flags;
> >   	char	sl_data[];
> >   };
> > @@ -321,7 +323,6 @@ struct nfsd4_session {
> >   	u32			se_cb_slot_avail; /* bitmap of available slots */
> >   	u32			se_cb_highest_slot;	/* highest slot client wants */
> >   	u32			se_cb_prog;
> > -	bool			se_dead;
> >   	struct list_head	se_hash;	/* hash by sessionid */
> >   	struct list_head	se_perclnt;
> >   	struct nfs4_client	*se_client;
> > @@ -331,6 +332,9 @@ struct nfsd4_session {
> >   	struct list_head	se_conns;
> >   	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
> >   	struct xarray		se_slots;	/* forward channel slots */
> > +	u16			se_slot_gen;
> > +	bool			se_dead;
> > +	u32			se_target_maxslots;
> >   };
> >   
> >   /* formatted contents of nfs4_sessionid */
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index 382cc1389396..c26ba86dbdfd 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >   	u32			slotid;			/* request/response */
> >   	u32			maxslots;		/* request/response */
> >   	u32			cachethis;		/* request */
> > -#if 0
> >   	u32			target_maxslots;	/* response */
> > -#endif /* not yet */
> >   	u32			status_flags;		/* response */
> >   };
> >   
> 
> Hi Neil -
> 
> I've found some misbehavior which I've bisected to this commit.

Hi Chuck,
 could you please confirm that it really was this commit that you
 bisected to?  Not the next one?
 Because this commit never reduces ->se_target_maxslots, so the
 patch which you say removed the symptom should be a no-op.

 Even if it was the next commit I'm struggling to pin down the
 problem.  Here is my current analysis - partly to ensure I can present
 it clearly.

 The evidence suggests that the client has retired a slot that the
 server hasn't.  This happens when nfs41_set_server_slotid_locked()
 calls nfsd4_shrink_slot_table(), and nothing will happen if any slots
 before the new limit are still in use.  If the server reduces
 its idea of the target when the client isn't even using that many,
 the slots can be freed immediately that the client gets a reply
 indicating the new highest_slot number from the server.

 The server will not free these slots immediately but will wait to get a
 confirmation from the client that it has accepted the new limit.  But,
 importantly, the server will not increase the limit that it sends to
 the client until after it has has a chance to free the retired slots.
 If the server doesn't increase the limit, then the client won't try to
 use the retired slots...

 Do you still have the network trace which chows the error?  Would I be
 able to look at it?

Thanks,
NeilBrown


 

 

> 
> If disconnect injection is set up to break the connection every 25,000
> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
> after this commit is applied.
> 
> Network capture shows that the server eventually starts returning
> SEQ_MISORDERED because the client has forgotten an retired slot after a
> disconnect, and tries to use sequence number 1 for that slot with a new
> operation.
> 
> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
> client. This function uses a bit of calculus to decide when to bump the
> slot table's generation number. In the failing case, it never bumps the
> generation, and that results in the client freeing slots it shouldn't.
> The server's reported { highest, target_highest } slot numbers don't
> appear to change correctly after the client has reconnected.
> 
> If I revert this hunk from 5/6:
> 
> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres 
> *resp, __be32 nfserr,
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_target_highest_slotid */
> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>   	if (nfserr != nfs_ok)
>   		return nfserr;
>   	/* sr_status_flags */
> 
> the reproducer above runs to completion in the expected amount of time.
> 
> 
> The high order bit here is whether I should drop these patches for
> v6.14, or whether you believe you can come up with a narrow solution
> during the early part of v6.14-rc that I can include in an -rc update
> for NFSD. I can't really tell if a significant amount of surgery will
> be necessary.
> 
> What do you think?
> 
> 
> -- 
> Chuck Lever
> 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-27  4:08     ` NeilBrown
@ 2025-01-27 13:57       ` Chuck Lever
  2025-01-27 22:57         ` NeilBrown
  0 siblings, 1 reply; 47+ messages in thread
From: Chuck Lever @ 2025-01-27 13:57 UTC (permalink / raw)
  To: NeilBrown; +Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On 1/26/25 11:08 PM, NeilBrown wrote:
> On Sun, 19 Jan 2025, Chuck Lever wrote:
>> On 12/11/24 4:47 PM, NeilBrown wrote:
>>> Reducing the number of slots in the session slot table requires
>>> confirmation from the client.  This patch adds reduce_session_slots()
>>> which starts the process of getting confirmation, but never calls it.
>>> That will come in a later patch.
>>>
>>> Before we can free a slot we need to confirm that the client won't try
>>> to use it again.  This involves returning a lower cr_maxrequests in a
>>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
>>> is not larger than we limit we are trying to impose.  So for each slot
>>> we need to remember that we have sent a reduced cr_maxrequests.
>>>
>>> To achieve this we introduce a concept of request "generations".  Each
>>> time we decide to reduce cr_maxrequests we increment the generation
>>> number, and record this when we return the lower cr_maxrequests to the
>>> client.  When a slot with the current generation reports a low
>>> ca_maxrequests, we commit to that level and free extra slots.
>>>
>>> We use an 16 bit generation number (64 seems wasteful) and if it cycles
>>> we iterate all slots and reset the generation number to avoid false matches.
>>>
>>> When we free a slot we store the seqid in the slot pointer so that it can
>>> be restored when we reactivate the slot.  The RFC can be read as
>>> suggesting that the slot number could restart from one after a slot is
>>> retired and reactivated, but also suggests that retiring slots is not
>>> required.  So when we reactive a slot we accept with the next seqid in
>>> sequence, or 1.
>>>
>>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
>>> matches how it is encoded for the reply.
>>>
>>> se_dead is moved in struct nfsd4_session to remove a hole.
>>>
>>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
>>> Signed-off-by: NeilBrown <neilb@suse.de>
>>> ---
>>>    fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
>>>    fs/nfsd/nfs4xdr.c   |  5 ++-
>>>    fs/nfsd/state.h     |  6 ++-
>>>    fs/nfsd/xdr4.h      |  2 -
>>>    4 files changed, 92 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>>> index fd9473d487f3..a2d1f97b8a0e 100644
>>> --- a/fs/nfsd/nfs4state.c
>>> +++ b/fs/nfsd/nfs4state.c
>>> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
>>>    #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
>>>    
>>>    static void
>>> -free_session_slots(struct nfsd4_session *ses)
>>> +free_session_slots(struct nfsd4_session *ses, int from)
>>>    {
>>>    	int i;
>>>    
>>> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +	if (from >= ses->se_fchannel.maxreqs)
>>> +		return;
>>> +
>>> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
>>>    		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>>    
>>> -		xa_erase(&ses->se_slots, i);
>>> +		/*
>>> +		 * Save the seqid in case we reactivate this slot.
>>> +		 * This will never require a memory allocation so GFP
>>> +		 * flag is irrelevant
>>> +		 */
>>> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
>>>    		free_svc_cred(&slot->sl_cred);
>>>    		kfree(slot);
>>>    	}
>>> +	ses->se_fchannel.maxreqs = from;
>>> +	if (ses->se_target_maxslots > from)
>>> +		ses->se_target_maxslots = from;
>>> +}
>>> +
>>> +/**
>>> + * reduce_session_slots - reduce the target max-slots of a session if possible
>>> + * @ses:  The session to affect
>>> + * @dec:  how much to decrease the target by
>>> + *
>>> + * This interface can be used by a shrinker to reduce the target max-slots
>>> + * for a session so that some slots can eventually be freed.
>>> + * It uses spin_trylock() as it may be called in a context where another
>>> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
>>> + * best-effort, skiping a session is client_lock is already held has no
>>> + * great coast
>>> + *
>>> + * Return value:
>>> + *   The number of slots that the target was reduced by.
>>> + */
>>> +static int __maybe_unused
>>> +reduce_session_slots(struct nfsd4_session *ses, int dec)
>>> +{
>>> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
>>> +					  nfsd_net_id);
>>> +	int ret = 0;
>>> +
>>> +	if (ses->se_target_maxslots <= 1)
>>> +		return ret;
>>> +	if (!spin_trylock(&nn->client_lock))
>>> +		return ret;
>>> +	ret = min(dec, ses->se_target_maxslots-1);
>>> +	ses->se_target_maxslots -= ret;
>>> +	ses->se_slot_gen += 1;
>>> +	if (ses->se_slot_gen == 0) {
>>> +		int i;
>>> +		ses->se_slot_gen = 1;
>>> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
>>> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
>>> +			slot->sl_generation = 0;
>>> +		}
>>> +	}
>>> +	spin_unlock(&nn->client_lock);
>>> +	return ret;
>>>    }
>>>    
>>>    /*
>>> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>>>    	}
>>>    	fattrs->maxreqs = i;
>>>    	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>>> +	new->se_target_maxslots = i;
>>>    	new->se_cb_slot_avail = ~0U;
>>>    	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
>>>    				      NFSD_BC_SLOT_TABLE_SIZE - 1);
>>> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
>>>    
>>>    static void __free_session(struct nfsd4_session *ses)
>>>    {
>>> -	free_session_slots(ses);
>>> +	free_session_slots(ses, 0);
>>>    	xa_destroy(&ses->se_slots);
>>>    	kfree(ses);
>>>    }
>>> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
>>>    	seq_printf(m, "session slots:");
>>>    	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>>    		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
>>> +	seq_printf(m, "\nsession target slots:");
>>> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
>>> +		seq_printf(m, " %u", ses->se_target_maxslots);
>>>    	spin_unlock(&clp->cl_lock);
>>>    	seq_puts(m, "\n");
>>>    
>>> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
>>>    	kfree(exid->server_impl_name);
>>>    }
>>>    
>>> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
>>>    {
>>>    	/* The slot is in use, and no response has been sent. */
>>> -	if (slot_inuse) {
>>> +	if (flags & NFSD4_SLOT_INUSE) {
>>>    		if (seqid == slot_seqid)
>>>    			return nfserr_jukebox;
>>>    		else
>>> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
>>>    	/* Note unsigned 32-bit arithmetic handles wraparound: */
>>>    	if (likely(seqid == slot_seqid + 1))
>>>    		return nfs_ok;
>>> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
>>> +		return nfs_ok;
>>>    	if (seqid == slot_seqid)
>>>    		return nfserr_replay_cache;
>>>    	return nfserr_seq_misordered;
>>> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	dprintk("%s: slotid %d\n", __func__, seq->slotid);
>>>    
>>>    	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
>>> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
>>> -					slot->sl_flags & NFSD4_SLOT_INUSE);
>>> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
>>>    	if (status == nfserr_replay_cache) {
>>>    		status = nfserr_seq_misordered;
>>>    		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
>>> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	if (status)
>>>    		goto out_put_session;
>>>    
>>> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
>>> +	    slot->sl_generation == session->se_slot_gen &&
>>> +	    seq->maxslots <= session->se_target_maxslots)
>>> +		/* Client acknowledged our reduce maxreqs */
>>> +		free_session_slots(session, session->se_target_maxslots);
>>> +
>>>    	buflen = (seq->cachethis) ?
>>>    			session->se_fchannel.maxresp_cached :
>>>    			session->se_fchannel.maxresp_sz;
>>> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	svc_reserve(rqstp, buflen);
>>>    
>>>    	status = nfs_ok;
>>> -	/* Success! bump slot seqid */
>>> +	/* Success! accept new slot seqid */
>>>    	slot->sl_seqid = seq->seqid;
>>> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
>>>    	slot->sl_flags |= NFSD4_SLOT_INUSE;
>>> +	slot->sl_generation = session->se_slot_gen;
>>>    	if (seq->cachethis)
>>>    		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
>>>    	else
>>> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    	 * the client might use.
>>>    	 */
>>>    	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
>>> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
>>>    	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
>>>    		int s = session->se_fchannel.maxreqs;
>>>    		int cnt = DIV_ROUND_UP(s, 5);
>>> +		void *prev_slot;
>>>    
>>>    		do {
>>>    			/*
>>> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>>>    			 */
>>>    			slot = kzalloc(slot_bytes(&session->se_fchannel),
>>>    				       GFP_NOWAIT);
>>> +			prev_slot = xa_load(&session->se_slots, s);
>>> +			if (xa_is_value(prev_slot) && slot) {
>>> +				slot->sl_seqid = xa_to_value(prev_slot);
>>> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
>>> +			}
>>>    			if (slot &&
>>>    			    !xa_is_err(xa_store(&session->se_slots, s, slot,
>>>    						GFP_NOWAIT))) {
>>>    				s += 1;
>>>    				session->se_fchannel.maxreqs = s;
>>> +				session->se_target_maxslots = s;
>>>    			} else {
>>>    				kfree(slot);
>>>    				slot = NULL;
>>>    			}
>>>    		} while (slot && --cnt > 0);
>>>    	}
>>> -	seq->maxslots = session->se_fchannel.maxreqs;
>>> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
>>> +	seq->target_maxslots = session->se_target_maxslots;
>>>    
>>>    out:
>>>    	switch (clp->cl_cb_state) {
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 53fac037611c..4dcb03cd9292 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
>>>    		return nfserr_bad_xdr;
>>>    	seq->seqid = be32_to_cpup(p++);
>>>    	seq->slotid = be32_to_cpup(p++);
>>> -	seq->maxslots = be32_to_cpup(p++);
>>> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
>>> +	seq->maxslots = be32_to_cpup(p++) + 1;
>>>    	seq->cachethis = be32_to_cpup(p);
>>>    
>>>    	seq->status_flags = 0;
>>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_target_highest_slotid */
>>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>>    	if (nfserr != nfs_ok)
>>>    		return nfserr;
>>>    	/* sr_status_flags */
>>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
>>> index aad547d3ad8b..4251ff3c5ad1 100644
>>> --- a/fs/nfsd/state.h
>>> +++ b/fs/nfsd/state.h
>>> @@ -245,10 +245,12 @@ struct nfsd4_slot {
>>>    	struct svc_cred sl_cred;
>>>    	u32	sl_datalen;
>>>    	u16	sl_opcnt;
>>> +	u16	sl_generation;
>>>    #define NFSD4_SLOT_INUSE	(1 << 0)
>>>    #define NFSD4_SLOT_CACHETHIS	(1 << 1)
>>>    #define NFSD4_SLOT_INITIALIZED	(1 << 2)
>>>    #define NFSD4_SLOT_CACHED	(1 << 3)
>>> +#define NFSD4_SLOT_REUSED	(1 << 4)
>>>    	u8	sl_flags;
>>>    	char	sl_data[];
>>>    };
>>> @@ -321,7 +323,6 @@ struct nfsd4_session {
>>>    	u32			se_cb_slot_avail; /* bitmap of available slots */
>>>    	u32			se_cb_highest_slot;	/* highest slot client wants */
>>>    	u32			se_cb_prog;
>>> -	bool			se_dead;
>>>    	struct list_head	se_hash;	/* hash by sessionid */
>>>    	struct list_head	se_perclnt;
>>>    	struct nfs4_client	*se_client;
>>> @@ -331,6 +332,9 @@ struct nfsd4_session {
>>>    	struct list_head	se_conns;
>>>    	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
>>>    	struct xarray		se_slots;	/* forward channel slots */
>>> +	u16			se_slot_gen;
>>> +	bool			se_dead;
>>> +	u32			se_target_maxslots;
>>>    };
>>>    
>>>    /* formatted contents of nfs4_sessionid */
>>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
>>> index 382cc1389396..c26ba86dbdfd 100644
>>> --- a/fs/nfsd/xdr4.h
>>> +++ b/fs/nfsd/xdr4.h
>>> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
>>>    	u32			slotid;			/* request/response */
>>>    	u32			maxslots;		/* request/response */
>>>    	u32			cachethis;		/* request */
>>> -#if 0
>>>    	u32			target_maxslots;	/* response */
>>> -#endif /* not yet */
>>>    	u32			status_flags;		/* response */
>>>    };
>>>    
>>
>> Hi Neil -
>>
>> I've found some misbehavior which I've bisected to this commit.
> 
> Hi Chuck,
>   could you please confirm that it really was this commit that you
>   bisected to?  Not the next one?

It's this one. I included the hunk that introduces the misbehavior
below. It's when the server starts returning a different value for
target_highest_slotid in the SEQUENCE result. The target_highest
is 63 -- the number the server has in its slot table. The maxslots
value is smaller.

In the working case, these two values never differ.


>   Because this commit never reduces ->se_target_maxslots, so the
>   patch which you say removed the symptom should be a no-op.

It's not the reducing of target_highest that's the problem. Rather
it's that the target_highest and max in-use slot IDs are different
values for a brief period after a reconnect.

That triggers the client to think that the server has reduced its
slot table size, so the client shrinks its slot table. The server has
not actually shrunken it, however, so it continues to expect the client
to use the large slot sequence numbers for those slots.

When the client starts to use one of those slots again, it uses a
sequence number of 1, and that fails.


>   Even if it was the next commit I'm struggling to pin down the
>   problem.  Here is my current analysis - partly to ensure I can present
>   it clearly.
> 
>   The evidence suggests that the client has retired a slot that the
>   server hasn't.  This happens when nfs41_set_server_slotid_locked()
>   calls nfsd4_shrink_slot_table(), and nothing will happen if any slots
>   before the new limit are still in use.  If the server reduces
>   its idea of the target when the client isn't even using that many,
>   the slots can be freed immediately that the client gets a reply
>   indicating the new highest_slot number from the server.
> 
>   The server will not free these slots immediately but will wait to get a
>   confirmation from the client that it has accepted the new limit.  But,
>   importantly, the server will not increase the limit that it sends to
>   the client until after it has has a chance to free the retired slots.
>   If the server doesn't increase the limit, then the client won't try to
>   use the retired slots...
> 
>   Do you still have the network trace which chows the error?  Would I be
>   able to look at it?

Sending via WeTransfer.

But also, it's easy enough to reproduce.

Build your server with CONFIG_FAIL_SUNRPC set. Reboot into the new
kernel.

Before each test, run this script on the server:

#!/usr/bin/bash

cd /sys/kernel/debug/fail_sunrpc/

echo Y > ignore-cache-wait
echo Y > ignore-client-disconnect
echo 24847 > interval
echo 97 > times
echo 100 > probability

exit 0

On the client, run fstests with an NFSv4.1 mount. It will usually hang
within the first 15 tests.


>> If disconnect injection is set up to break the connection every 25,000
>> RPCs or so, xfstests running on an NFSv4.1 mount will eventually stall
>> after this commit is applied.
>>
>> Network capture shows that the server eventually starts returning
>> SEQ_MISORDERED because the client has forgotten an retired slot after a
>> disconnect, and tries to use sequence number 1 for that slot with a new
>> operation.
>>
>> I've narrowed the issue down to nfs41_is_outlier_target_slotid() on the
>> client. This function uses a bit of calculus to decide when to bump the
>> slot table's generation number. In the failing case, it never bumps the
>> generation, and that results in the client freeing slots it shouldn't.
>> The server's reported { highest, target_highest } slot numbers don't
>> appear to change correctly after the client has reconnected.
>>
>> If I revert this hunk from 5/6:
>>
>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres
>> *resp, __be32 nfserr,
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_target_highest_slotid */
>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
>>    	if (nfserr != nfs_ok)
>>    		return nfserr;
>>    	/* sr_status_flags */
>>
>> the reproducer above runs to completion in the expected amount of time.
>>
>>
>> The high order bit here is whether I should drop these patches for
>> v6.14, or whether you believe you can come up with a narrow solution
>> during the early part of v6.14-rc that I can include in an -rc update
>> for NFSD. I can't really tell if a significant amount of surgery will
>> be necessary.
>>
>> What do you think?
>>
>>
>> -- 
>> Chuck Lever
>>
> 


-- 
Chuck Lever

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots
  2025-01-27 13:57       ` Chuck Lever
@ 2025-01-27 22:57         ` NeilBrown
  0 siblings, 0 replies; 47+ messages in thread
From: NeilBrown @ 2025-01-27 22:57 UTC (permalink / raw)
  To: Chuck Lever
  Cc: linux-nfs, Olga Kornievskaia, Dai Ngo, Tom Talpey, Jeff Layton

On Tue, 28 Jan 2025, Chuck Lever wrote:
> On 1/26/25 11:08 PM, NeilBrown wrote:
> > On Sun, 19 Jan 2025, Chuck Lever wrote:
> >> On 12/11/24 4:47 PM, NeilBrown wrote:
> >>> Reducing the number of slots in the session slot table requires
> >>> confirmation from the client.  This patch adds reduce_session_slots()
> >>> which starts the process of getting confirmation, but never calls it.
> >>> That will come in a later patch.
> >>>
> >>> Before we can free a slot we need to confirm that the client won't try
> >>> to use it again.  This involves returning a lower cr_maxrequests in a
> >>> SEQUENCE reply and then seeing a ca_maxrequests on the same slot which
> >>> is not larger than we limit we are trying to impose.  So for each slot
> >>> we need to remember that we have sent a reduced cr_maxrequests.
> >>>
> >>> To achieve this we introduce a concept of request "generations".  Each
> >>> time we decide to reduce cr_maxrequests we increment the generation
> >>> number, and record this when we return the lower cr_maxrequests to the
> >>> client.  When a slot with the current generation reports a low
> >>> ca_maxrequests, we commit to that level and free extra slots.
> >>>
> >>> We use an 16 bit generation number (64 seems wasteful) and if it cycles
> >>> we iterate all slots and reset the generation number to avoid false matches.
> >>>
> >>> When we free a slot we store the seqid in the slot pointer so that it can
> >>> be restored when we reactivate the slot.  The RFC can be read as
> >>> suggesting that the slot number could restart from one after a slot is
> >>> retired and reactivated, but also suggests that retiring slots is not
> >>> required.  So when we reactive a slot we accept with the next seqid in
> >>> sequence, or 1.
> >>>
> >>> When decoding sa_highest_slotid into maxslots we need to add 1 - this
> >>> matches how it is encoded for the reply.
> >>>
> >>> se_dead is moved in struct nfsd4_session to remove a hole.
> >>>
> >>> Reviewed-by: Jeff Layton <jlayton@kernel.org>
> >>> Signed-off-by: NeilBrown <neilb@suse.de>
> >>> ---
> >>>    fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
> >>>    fs/nfsd/nfs4xdr.c   |  5 ++-
> >>>    fs/nfsd/state.h     |  6 ++-
> >>>    fs/nfsd/xdr4.h      |  2 -
> >>>    4 files changed, 92 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> >>> index fd9473d487f3..a2d1f97b8a0e 100644
> >>> --- a/fs/nfsd/nfs4state.c
> >>> +++ b/fs/nfsd/nfs4state.c
> >>> @@ -1910,17 +1910,69 @@ gen_sessionid(struct nfsd4_session *ses)
> >>>    #define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)
> >>>    
> >>>    static void
> >>> -free_session_slots(struct nfsd4_session *ses)
> >>> +free_session_slots(struct nfsd4_session *ses, int from)
> >>>    {
> >>>    	int i;
> >>>    
> >>> -	for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> >>> +	if (from >= ses->se_fchannel.maxreqs)
> >>> +		return;
> >>> +
> >>> +	for (i = from; i < ses->se_fchannel.maxreqs; i++) {
> >>>    		struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >>>    
> >>> -		xa_erase(&ses->se_slots, i);
> >>> +		/*
> >>> +		 * Save the seqid in case we reactivate this slot.
> >>> +		 * This will never require a memory allocation so GFP
> >>> +		 * flag is irrelevant
> >>> +		 */
> >>> +		xa_store(&ses->se_slots, i, xa_mk_value(slot->sl_seqid), 0);
> >>>    		free_svc_cred(&slot->sl_cred);
> >>>    		kfree(slot);
> >>>    	}
> >>> +	ses->se_fchannel.maxreqs = from;
> >>> +	if (ses->se_target_maxslots > from)
> >>> +		ses->se_target_maxslots = from;
> >>> +}
> >>> +
> >>> +/**
> >>> + * reduce_session_slots - reduce the target max-slots of a session if possible
> >>> + * @ses:  The session to affect
> >>> + * @dec:  how much to decrease the target by
> >>> + *
> >>> + * This interface can be used by a shrinker to reduce the target max-slots
> >>> + * for a session so that some slots can eventually be freed.
> >>> + * It uses spin_trylock() as it may be called in a context where another
> >>> + * spinlock is held that has a dependency on client_lock.  As shrinkers are
> >>> + * best-effort, skiping a session is client_lock is already held has no
> >>> + * great coast
> >>> + *
> >>> + * Return value:
> >>> + *   The number of slots that the target was reduced by.
> >>> + */
> >>> +static int __maybe_unused
> >>> +reduce_session_slots(struct nfsd4_session *ses, int dec)
> >>> +{
> >>> +	struct nfsd_net *nn = net_generic(ses->se_client->net,
> >>> +					  nfsd_net_id);
> >>> +	int ret = 0;
> >>> +
> >>> +	if (ses->se_target_maxslots <= 1)
> >>> +		return ret;
> >>> +	if (!spin_trylock(&nn->client_lock))
> >>> +		return ret;
> >>> +	ret = min(dec, ses->se_target_maxslots-1);
> >>> +	ses->se_target_maxslots -= ret;
> >>> +	ses->se_slot_gen += 1;
> >>> +	if (ses->se_slot_gen == 0) {
> >>> +		int i;
> >>> +		ses->se_slot_gen = 1;
> >>> +		for (i = 0; i < ses->se_fchannel.maxreqs; i++) {
> >>> +			struct nfsd4_slot *slot = xa_load(&ses->se_slots, i);
> >>> +			slot->sl_generation = 0;
> >>> +		}
> >>> +	}
> >>> +	spin_unlock(&nn->client_lock);
> >>> +	return ret;
> >>>    }
> >>>    
> >>>    /*
> >>> @@ -1968,6 +2020,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
> >>>    	}
> >>>    	fattrs->maxreqs = i;
> >>>    	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
> >>> +	new->se_target_maxslots = i;
> >>>    	new->se_cb_slot_avail = ~0U;
> >>>    	new->se_cb_highest_slot = min(battrs->maxreqs - 1,
> >>>    				      NFSD_BC_SLOT_TABLE_SIZE - 1);
> >>> @@ -2081,7 +2134,7 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
> >>>    
> >>>    static void __free_session(struct nfsd4_session *ses)
> >>>    {
> >>> -	free_session_slots(ses);
> >>> +	free_session_slots(ses, 0);
> >>>    	xa_destroy(&ses->se_slots);
> >>>    	kfree(ses);
> >>>    }
> >>> @@ -2684,6 +2737,9 @@ static int client_info_show(struct seq_file *m, void *v)
> >>>    	seq_printf(m, "session slots:");
> >>>    	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >>>    		seq_printf(m, " %u", ses->se_fchannel.maxreqs);
> >>> +	seq_printf(m, "\nsession target slots:");
> >>> +	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
> >>> +		seq_printf(m, " %u", ses->se_target_maxslots);
> >>>    	spin_unlock(&clp->cl_lock);
> >>>    	seq_puts(m, "\n");
> >>>    
> >>> @@ -3674,10 +3730,10 @@ nfsd4_exchange_id_release(union nfsd4_op_u *u)
> >>>    	kfree(exid->server_impl_name);
> >>>    }
> >>>    
> >>> -static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >>> +static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, u8 flags)
> >>>    {
> >>>    	/* The slot is in use, and no response has been sent. */
> >>> -	if (slot_inuse) {
> >>> +	if (flags & NFSD4_SLOT_INUSE) {
> >>>    		if (seqid == slot_seqid)
> >>>    			return nfserr_jukebox;
> >>>    		else
> >>> @@ -3686,6 +3742,8 @@ static __be32 check_slot_seqid(u32 seqid, u32 slot_seqid, bool slot_inuse)
> >>>    	/* Note unsigned 32-bit arithmetic handles wraparound: */
> >>>    	if (likely(seqid == slot_seqid + 1))
> >>>    		return nfs_ok;
> >>> +	if ((flags & NFSD4_SLOT_REUSED) && seqid == 1)
> >>> +		return nfs_ok;
> >>>    	if (seqid == slot_seqid)
> >>>    		return nfserr_replay_cache;
> >>>    	return nfserr_seq_misordered;
> >>> @@ -4236,8 +4294,7 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	dprintk("%s: slotid %d\n", __func__, seq->slotid);
> >>>    
> >>>    	trace_nfsd_slot_seqid_sequence(clp, seq, slot);
> >>> -	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
> >>> -					slot->sl_flags & NFSD4_SLOT_INUSE);
> >>> +	status = check_slot_seqid(seq->seqid, slot->sl_seqid, slot->sl_flags);
> >>>    	if (status == nfserr_replay_cache) {
> >>>    		status = nfserr_seq_misordered;
> >>>    		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
> >>> @@ -4262,6 +4319,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	if (status)
> >>>    		goto out_put_session;
> >>>    
> >>> +	if (session->se_target_maxslots < session->se_fchannel.maxreqs &&
> >>> +	    slot->sl_generation == session->se_slot_gen &&
> >>> +	    seq->maxslots <= session->se_target_maxslots)
> >>> +		/* Client acknowledged our reduce maxreqs */
> >>> +		free_session_slots(session, session->se_target_maxslots);
> >>> +
> >>>    	buflen = (seq->cachethis) ?
> >>>    			session->se_fchannel.maxresp_cached :
> >>>    			session->se_fchannel.maxresp_sz;
> >>> @@ -4272,9 +4335,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	svc_reserve(rqstp, buflen);
> >>>    
> >>>    	status = nfs_ok;
> >>> -	/* Success! bump slot seqid */
> >>> +	/* Success! accept new slot seqid */
> >>>    	slot->sl_seqid = seq->seqid;
> >>> +	slot->sl_flags &= ~NFSD4_SLOT_REUSED;
> >>>    	slot->sl_flags |= NFSD4_SLOT_INUSE;
> >>> +	slot->sl_generation = session->se_slot_gen;
> >>>    	if (seq->cachethis)
> >>>    		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
> >>>    	else
> >>> @@ -4291,9 +4356,11 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    	 * the client might use.
> >>>    	 */
> >>>    	if (seq->slotid == session->se_fchannel.maxreqs - 1 &&
> >>> +	    session->se_target_maxslots >= session->se_fchannel.maxreqs &&
> >>>    	    session->se_fchannel.maxreqs < NFSD_MAX_SLOTS_PER_SESSION) {
> >>>    		int s = session->se_fchannel.maxreqs;
> >>>    		int cnt = DIV_ROUND_UP(s, 5);
> >>> +		void *prev_slot;
> >>>    
> >>>    		do {
> >>>    			/*
> >>> @@ -4303,18 +4370,25 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >>>    			 */
> >>>    			slot = kzalloc(slot_bytes(&session->se_fchannel),
> >>>    				       GFP_NOWAIT);
> >>> +			prev_slot = xa_load(&session->se_slots, s);
> >>> +			if (xa_is_value(prev_slot) && slot) {
> >>> +				slot->sl_seqid = xa_to_value(prev_slot);
> >>> +				slot->sl_flags |= NFSD4_SLOT_REUSED;
> >>> +			}
> >>>    			if (slot &&
> >>>    			    !xa_is_err(xa_store(&session->se_slots, s, slot,
> >>>    						GFP_NOWAIT))) {
> >>>    				s += 1;
> >>>    				session->se_fchannel.maxreqs = s;
> >>> +				session->se_target_maxslots = s;
> >>>    			} else {
> >>>    				kfree(slot);
> >>>    				slot = NULL;
> >>>    			}
> >>>    		} while (slot && --cnt > 0);
> >>>    	}
> >>> -	seq->maxslots = session->se_fchannel.maxreqs;
> >>> +	seq->maxslots = max(session->se_target_maxslots, seq->maxslots);
> >>> +	seq->target_maxslots = session->se_target_maxslots;
> >>>    
> >>>    out:
> >>>    	switch (clp->cl_cb_state) {
> >>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> >>> index 53fac037611c..4dcb03cd9292 100644
> >>> --- a/fs/nfsd/nfs4xdr.c
> >>> +++ b/fs/nfsd/nfs4xdr.c
> >>> @@ -1884,7 +1884,8 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
> >>>    		return nfserr_bad_xdr;
> >>>    	seq->seqid = be32_to_cpup(p++);
> >>>    	seq->slotid = be32_to_cpup(p++);
> >>> -	seq->maxslots = be32_to_cpup(p++);
> >>> +	/* sa_highest_slotid counts from 0 but maxslots  counts from 1 ... */
> >>> +	seq->maxslots = be32_to_cpup(p++) + 1;
> >>>    	seq->cachethis = be32_to_cpup(p);
> >>>    
> >>>    	seq->status_flags = 0;
> >>> @@ -4968,7 +4969,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
> >>>    	if (nfserr != nfs_ok)
> >>>    		return nfserr;
> >>>    	/* sr_target_highest_slotid */
> >>> -	nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1);
> >>> +	nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1);
> >>>    	if (nfserr != nfs_ok)
> >>>    		return nfserr;
> >>>    	/* sr_status_flags */
> >>> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> >>> index aad547d3ad8b..4251ff3c5ad1 100644
> >>> --- a/fs/nfsd/state.h
> >>> +++ b/fs/nfsd/state.h
> >>> @@ -245,10 +245,12 @@ struct nfsd4_slot {
> >>>    	struct svc_cred sl_cred;
> >>>    	u32	sl_datalen;
> >>>    	u16	sl_opcnt;
> >>> +	u16	sl_generation;
> >>>    #define NFSD4_SLOT_INUSE	(1 << 0)
> >>>    #define NFSD4_SLOT_CACHETHIS	(1 << 1)
> >>>    #define NFSD4_SLOT_INITIALIZED	(1 << 2)
> >>>    #define NFSD4_SLOT_CACHED	(1 << 3)
> >>> +#define NFSD4_SLOT_REUSED	(1 << 4)
> >>>    	u8	sl_flags;
> >>>    	char	sl_data[];
> >>>    };
> >>> @@ -321,7 +323,6 @@ struct nfsd4_session {
> >>>    	u32			se_cb_slot_avail; /* bitmap of available slots */
> >>>    	u32			se_cb_highest_slot;	/* highest slot client wants */
> >>>    	u32			se_cb_prog;
> >>> -	bool			se_dead;
> >>>    	struct list_head	se_hash;	/* hash by sessionid */
> >>>    	struct list_head	se_perclnt;
> >>>    	struct nfs4_client	*se_client;
> >>> @@ -331,6 +332,9 @@ struct nfsd4_session {
> >>>    	struct list_head	se_conns;
> >>>    	u32			se_cb_seq_nr[NFSD_BC_SLOT_TABLE_SIZE];
> >>>    	struct xarray		se_slots;	/* forward channel slots */
> >>> +	u16			se_slot_gen;
> >>> +	bool			se_dead;
> >>> +	u32			se_target_maxslots;
> >>>    };
> >>>    
> >>>    /* formatted contents of nfs4_sessionid */
> >>> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> >>> index 382cc1389396..c26ba86dbdfd 100644
> >>> --- a/fs/nfsd/xdr4.h
> >>> +++ b/fs/nfsd/xdr4.h
> >>> @@ -576,9 +576,7 @@ struct nfsd4_sequence {
> >>>    	u32			slotid;			/* request/response */
> >>>    	u32			maxslots;		/* request/response */
> >>>    	u32			cachethis;		/* request */
> >>> -#if 0
> >>>    	u32			target_maxslots;	/* response */
> >>> -#endif /* not yet */
> >>>    	u32			status_flags;		/* response */
> >>>    };
> >>>    
> >>
> >> Hi Neil -
> >>
> >> I've found some misbehavior which I've bisected to this commit.
> > 
> > Hi Chuck,
> >   could you please confirm that it really was this commit that you
> >   bisected to?  Not the next one?
> 
> It's this one. I included the hunk that introduces the misbehavior
> below. It's when the server starts returning a different value for
> target_highest_slotid in the SEQUENCE result. The target_highest
> is 63 -- the number the server has in its slot table. The maxslots
> value is smaller.
> 
> In the working case, these two values never differ.
> 
> 
> >   Because this commit never reduces ->se_target_maxslots, so the
> >   patch which you say removed the symptom should be a no-op.
> 
> It's not the reducing of target_highest that's the problem. Rather
> it's that the target_highest and max in-use slot IDs are different
> values for a brief period after a reconnect.
> 
> That triggers the client to think that the server has reduced its
> slot table size, so the client shrinks its slot table. The server has
> not actually shrunken it, however, so it continues to expect the client
> to use the large slot sequence numbers for those slots.
> 
> When the client starts to use one of those slots again, it uses a
> sequence number of 1, and that fails.
> 
> 
> >   Even if it was the next commit I'm struggling to pin down the
> >   problem.  Here is my current analysis - partly to ensure I can present
> >   it clearly.
> > 
> >   The evidence suggests that the client has retired a slot that the
> >   server hasn't.  This happens when nfs41_set_server_slotid_locked()
> >   calls nfsd4_shrink_slot_table(), and nothing will happen if any slots
> >   before the new limit are still in use.  If the server reduces
> >   its idea of the target when the client isn't even using that many,
> >   the slots can be freed immediately that the client gets a reply
> >   indicating the new highest_slot number from the server.
> > 
> >   The server will not free these slots immediately but will wait to get a
> >   confirmation from the client that it has accepted the new limit.  But,
> >   importantly, the server will not increase the limit that it sends to
> >   the client until after it has has a chance to free the retired slots.
> >   If the server doesn't increase the limit, then the client won't try to
> >   use the retired slots...
> > 
> >   Do you still have the network trace which chows the error?  Would I be
> >   able to look at it?
> 
> Sending via WeTransfer.

Thanks.  I see the problem clearly now.  It isn't so much that 'high
slot' and 'target' are different, it is that they are both wrong.
'highslot' (which is the max the server will accept) is typically 0 or
4, and 'target' is 63.

The 'highest slot' has been copied from what the client said was the max
currently in use.  I don't know where the 'target' came from, maybe from
the previous reply sent.

This bug was introduced in 
   nfsd: allocate new session-based DRC slots on demand.
I should have move the seq->maxslots assignment after the "out:", not
before.

I'll send a patch.


> 
> But also, it's easy enough to reproduce.
> 
> Build your server with CONFIG_FAIL_SUNRPC set. Reboot into the new
> kernel.
> 
> Before each test, run this script on the server:
> 
> #!/usr/bin/bash
> 
> cd /sys/kernel/debug/fail_sunrpc/
> 
> echo Y > ignore-cache-wait
> echo Y > ignore-client-disconnect
> echo 24847 > interval
> echo 97 > times
> echo 100 > probability
> 
> exit 0
> 
> On the client, run fstests with an NFSv4.1 mount. It will usually hang
> within the first 15 tests.
> 

I might give that a try - thanks.

NeilBrown

^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2025-01-27 22:57 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-19  0:41 [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-11-19  0:41 ` [PATCH 1/6] nfsd: use an xarray to store v4.1 session slots NeilBrown
2024-11-19  0:41 ` [PATCH 2/6] nfsd: remove artificial limits on the session-based DRC NeilBrown
2024-11-19  0:41 ` [PATCH 3/6] nfsd: add session slot count to /proc/fs/nfsd/clients/*/info NeilBrown
2024-11-19 19:14   ` Chuck Lever
2024-11-19 22:22     ` NeilBrown
2024-11-20  0:21       ` Chuck Lever
2024-11-19 19:21   ` Chuck Lever
2024-11-19 22:24     ` NeilBrown
2024-11-20  0:25       ` Chuck Lever
2024-11-21 21:03         ` NeilBrown
2024-11-21 21:24           ` Chuck Lever III
2024-11-19  0:41 ` [PATCH 4/6] nfsd: allocate new session-based DRC slots on demand NeilBrown
2024-11-19 19:20   ` Chuck Lever
2024-11-19 22:27     ` NeilBrown
2024-11-20  0:32       ` Chuck Lever
2024-11-21 21:20         ` NeilBrown
2024-11-19 19:34   ` Jeff Layton
2024-11-19  0:41 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2024-11-19 19:25   ` Chuck Lever
2024-11-19 22:35     ` NeilBrown
2024-11-20  1:27       ` Chuck Lever
2024-11-21 21:47         ` NeilBrown
2024-11-21 22:29           ` Chuck Lever III
2024-12-02 16:11             ` Chuck Lever III
2024-12-03  4:28               ` NeilBrown
2024-12-03 14:40                 ` Chuck Lever III
2024-11-19 19:48   ` Jeff Layton
2024-11-19  0:41 ` [PATCH 6/6] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
2024-11-19 19:28   ` Chuck Lever
2024-11-19 22:41     ` NeilBrown
2024-11-19 21:17   ` Jeff Layton
2024-11-19 22:47     ` NeilBrown
2024-11-19 21:31 ` [PATCH 0/6 RFC v2] nfsd: allocate/free session-based DRC slots on demand Jeff Layton
2024-11-19 22:52   ` NeilBrown
  -- strict thread matches above, loose matches on Subject: below --
2024-12-06  0:43 [PATCH 0/6 v3] " NeilBrown
2024-12-06  0:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2024-12-06  5:30   ` Jeff Layton
2024-12-06  6:05     ` NeilBrown
2024-12-06 13:59       ` Jeff Layton
2024-12-08 22:43 [PATCH 0/6 v4] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-12-08 22:43 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2024-12-11 21:47 [PATCH 0/6 v5] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-12-11 21:47 ` [PATCH 5/6] nfsd: add support for freeing unused session-DRC slots NeilBrown
2025-01-19  2:01   ` Chuck Lever
2025-01-21  2:36     ` NeilBrown
2025-01-21 16:24       ` Chuck Lever
2025-01-27  4:08     ` NeilBrown
2025-01-27 13:57       ` Chuck Lever
2025-01-27 22:57         ` NeilBrown

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox