Re: [PATCH 1/4] nfsd: remove artificial limits on the session-based DRC

public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed

From: Jeff Layton <jlayton@kernel.org>
To: NeilBrown <neilb@suse.de>, Chuck Lever <chuck.lever@oracle.com>
Cc: linux-nfs@vger.kernel.org,
	Olga Kornievskaia <okorniev@redhat.com>,
	Dai Ngo <Dai.Ngo@oracle.com>, Tom Talpey <tom@talpey.com>
Subject: Re: [PATCH 1/4] nfsd: remove artificial limits on the session-based DRC
Date: Wed, 13 Nov 2024 11:48:38 -0500	[thread overview]
Message-ID: <1db227ccb347c7877998ab20a609bc3a9896c7db.camel@kernel.org> (raw)
In-Reply-To: <20241113055345.494856-2-neilb@suse.de>

On Wed, 2024-11-13 at 16:38 +1100, NeilBrown wrote:
> Rather than guessing how much space it might be safe to use for the DRC,
> simply try allocating slots and be prepared to accept failure.
> 
> The first slot for each session is allocated with GFP_KERNEL which is
> unlikely to fail.  Subsequent slots are allocated with the addition of
> __GFP_NORETRY which is expected to fail if there isn't much free memory.
> 
> This is probably too aggressive but clears the way for adding a
> shrinker interface to free extra slots when memory is tight.
> 
> Also *always* allow NFSD_MAX_SLOTS_PER_SESSION slot pointers per
> session.  This allows the session to be allocated before we know how
> many slots we will successfully allocate, and will be useful when we
> starting dynamically increasing the number of allocated slots.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/nfsd/nfs4state.c | 91 +++++++--------------------------------------
>  fs/nfsd/nfsd.h      |  3 --
>  fs/nfsd/nfssvc.c    | 32 ----------------
>  fs/nfsd/state.h     |  2 +-
>  4 files changed, 14 insertions(+), 114 deletions(-)
> 
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 551d2958ec29..2dcba0c83c10 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -1935,59 +1935,6 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
>  	return size + sizeof(struct nfsd4_slot);
>  }
>  
> -/*
> - * XXX: If we run out of reserved DRC memory we could (up to a point)
> - * re-negotiate active sessions and reduce their slot usage to make
> - * room for new connections. For now we just fail the create session.
> - */
> -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
> -{
> -	u32 slotsize = slot_bytes(ca);
> -	u32 num = ca->maxreqs;
> -	unsigned long avail, total_avail;
> -	unsigned int scale_factor;
> -
> -	spin_lock(&nfsd_drc_lock);
> -	if (nfsd_drc_max_mem > nfsd_drc_mem_used)
> -		total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used;
> -	else
> -		/* We have handed out more space than we chose in
> -		 * set_max_drc() to allow.  That isn't really a
> -		 * problem as long as that doesn't make us think we
> -		 * have lots more due to integer overflow.
> -		 */
> -		total_avail = 0;
> -	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail);
> -	/*
> -	 * Never use more than a fraction of the remaining memory,
> -	 * unless it's the only way to give this client a slot.
> -	 * The chosen fraction is either 1/8 or 1/number of threads,
> -	 * whichever is smaller.  This ensures there are adequate
> -	 * slots to support multiple clients per thread.
> -	 * Give the client one slot even if that would require
> -	 * over-allocation--it is better than failure.
> -	 */
> -	scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads);
> -
> -	avail = clamp_t(unsigned long, avail, slotsize,
> -			total_avail/scale_factor);
> -	num = min_t(int, num, avail / slotsize);
> -	num = max_t(int, num, 1);
> -	nfsd_drc_mem_used += num * slotsize;
> -	spin_unlock(&nfsd_drc_lock);
> -
> -	return num;
> -}
> -
> -static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
> -{
> -	int slotsize = slot_bytes(ca);
> -
> -	spin_lock(&nfsd_drc_lock);
> -	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
> -	spin_unlock(&nfsd_drc_lock);
> -}
> -
>  static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  					   struct nfsd4_channel_attrs *battrs)
>  {
> @@ -1996,26 +1943,27 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
>  	struct nfsd4_session *new;
>  	int i;
>  
> -	BUILD_BUG_ON(struct_size(new, se_slots, NFSD_MAX_SLOTS_PER_SESSION)
> -		     > PAGE_SIZE);
> +	BUILD_BUG_ON(sizeof(new) > PAGE_SIZE);
>  
> -	new = kzalloc(struct_size(new, se_slots, numslots), GFP_KERNEL);
> +	new = kzalloc(sizeof(*new), GFP_KERNEL);
>  	if (!new)
>  		return NULL;
>  	/* allocate each struct nfsd4_slot and data cache in one piece */
> -	for (i = 0; i < numslots; i++) {
> -		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
> +	new->se_slots[0] = kzalloc(slotsize, GFP_KERNEL);
> +	if (!new->se_slots[0])
> +		goto out_free;
> +
> +	for (i = 1; i < numslots; i++) {
> +		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL | __GFP_NORETRY);
>  		if (!new->se_slots[i])
> -			goto out_free;
> +			break;
>  	}
> -
> +	fattrs->maxreqs = i;
>  	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
>  	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));
>  
>  	return new;
>  out_free:
> -	while (i--)
> -		kfree(new->se_slots[i]);
>  	kfree(new);
>  	return NULL;
>  }
> @@ -2128,7 +2076,6 @@ static void __free_session(struct nfsd4_session *ses)
>  static void free_session(struct nfsd4_session *ses)
>  {
>  	nfsd4_del_conns(ses);
> -	nfsd4_put_drc_mem(&ses->se_fchannel);
>  	__free_session(ses);
>  }
>  
> @@ -3748,17 +3695,6 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
>  	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
>  			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
>  	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
> -	/*
> -	 * Note decreasing slot size below client's request may make it
> -	 * difficult for client to function correctly, whereas
> -	 * decreasing the number of slots will (just?) affect
> -	 * performance.  When short on memory we therefore prefer to
> -	 * decrease number of slots instead of their size.  Clients that
> -	 * request larger slots than they need will get poor results:
> -	 * Note that we always allow at least one slot, because our
> -	 * accounting is soft and provides no guarantees either way.
> -	 */
> -	ca->maxreqs = nfsd4_get_drc_mem(ca, nn);
>  
>  	return nfs_ok;
>  }
> @@ -3836,11 +3772,11 @@ nfsd4_create_session(struct svc_rqst *rqstp,
>  		return status;
>  	status = check_backchannel_attrs(&cr_ses->back_channel);
>  	if (status)
> -		goto out_release_drc_mem;
> +		goto out_err;
>  	status = nfserr_jukebox;
>  	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
>  	if (!new)
> -		goto out_release_drc_mem;
> +		goto out_err;
>  	conn = alloc_conn_from_crses(rqstp, cr_ses);
>  	if (!conn)
>  		goto out_free_session;
> @@ -3950,8 +3886,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
>  		expire_client(old);
>  out_free_session:
>  	__free_session(new);
> -out_release_drc_mem:
> -	nfsd4_put_drc_mem(&cr_ses->fore_channel);
> +out_err:
>  	return status;
>  }
>  
> diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
> index 4b56ba1e8e48..3eb21e63b921 100644
> --- a/fs/nfsd/nfsd.h
> +++ b/fs/nfsd/nfsd.h
> @@ -88,9 +88,6 @@ struct nfsd_genl_rqstp {
>  extern struct svc_program	nfsd_programs[];
>  extern const struct svc_version	nfsd_version2, nfsd_version3, nfsd_version4;
>  extern struct mutex		nfsd_mutex;
> -extern spinlock_t		nfsd_drc_lock;
> -extern unsigned long		nfsd_drc_max_mem;
> -extern unsigned long		nfsd_drc_mem_used;
>  extern atomic_t			nfsd_th_cnt;		/* number of available threads */
>  
>  extern const struct seq_operations nfs_exports_op;
> diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
> index 49e2f32102ab..3dbaefc96608 100644
> --- a/fs/nfsd/nfssvc.c
> +++ b/fs/nfsd/nfssvc.c
> @@ -70,16 +70,6 @@ static __be32			nfsd_init_request(struct svc_rqst *,
>   */
>  DEFINE_MUTEX(nfsd_mutex);
>  
> -/*
> - * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
> - * nfsd_drc_max_pages limits the total amount of memory available for
> - * version 4.1 DRC caches.
> - * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
> - */
> -DEFINE_SPINLOCK(nfsd_drc_lock);
> -unsigned long	nfsd_drc_max_mem;
> -unsigned long	nfsd_drc_mem_used;
> -
>  #if IS_ENABLED(CONFIG_NFS_LOCALIO)
>  static const struct svc_version *localio_versions[] = {
>  	[1] = &localio_version1,
> @@ -575,27 +565,6 @@ void nfsd_reset_versions(struct nfsd_net *nn)
>  		}
>  }
>  
> -/*
> - * Each session guarantees a negotiated per slot memory cache for replies
> - * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
> - * NFSv4.1 server might want to use more memory for a DRC than a machine
> - * with mutiple services.
> - *
> - * Impose a hard limit on the number of pages for the DRC which varies
> - * according to the machines free pages. This is of course only a default.
> - *
> - * For now this is a #defined shift which could be under admin control
> - * in the future.
> - */
> -static void set_max_drc(void)
> -{
> -	#define NFSD_DRC_SIZE_SHIFT	7
> -	nfsd_drc_max_mem = (nr_free_buffer_pages()
> -					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
> -	nfsd_drc_mem_used = 0;
> -	dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem);
> -}
> -
>  static int nfsd_get_default_max_blksize(void)
>  {
>  	struct sysinfo i;
> @@ -678,7 +647,6 @@ int nfsd_create_serv(struct net *net)
>  	nn->nfsd_serv = serv;
>  	spin_unlock(&nfsd_notifier_lock);
>  
> -	set_max_drc();
>  	/* check if the notifier is already set */
>  	if (atomic_inc_return(&nfsd_notifier_refcount) == 1) {
>  		register_inetaddr_notifier(&nfsd_inetaddr_notifier);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 35b3564c065f..c052e9eea81b 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -310,7 +310,7 @@ struct nfsd4_session {
>  	struct list_head	se_conns;
>  	u32			se_cb_prog;
>  	u32			se_cb_seq_nr;
> -	struct nfsd4_slot	*se_slots[];	/* forward channel slots */
> +	struct nfsd4_slot	*se_slots[NFSD_MAX_SLOTS_PER_SESSION];	/* forward channel slots */
>  };
>  
>  /* formatted contents of nfs4_sessionid */

I like this patch overall. One thing we could consider is allocating
slots aggressively with GFP_KERNEL up to a particular threshold, and
then switch to GFP_NOWAIT, but this seems like a reasonable place to
start.
 
-- 
Jeff Layton <jlayton@kernel.org>

next prev parent reply	other threads:[~2024-11-13 16:48 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-13  5:38 [PATCH 0/4 RFC] nfsd: allocate/free session-based DRC slots on demand NeilBrown
2024-11-13  5:38 ` [PATCH 1/4] nfsd: remove artificial limits on the session-based DRC NeilBrown
2024-11-13 16:48   ` Jeff Layton [this message]
2024-11-15  4:01     ` NeilBrown
2024-11-15 14:07       ` Jeff Layton
2024-11-13  5:38 ` [PATCH 2/4] nfsd: allocate new session-based DRC slots on demand NeilBrown
2024-11-13  5:38 ` [PATCH 3/4] nfsd: free unused session-DRC slots NeilBrown
2024-11-13 16:52   ` Jeff Layton
2024-11-13  5:38 ` [PATCH 4/4] nfsd: add shrinker to reduce number of slots allocated per session NeilBrown
2024-11-13 11:23 ` [PATCH 0/4 RFC] nfsd: allocate/free session-based DRC slots on demand Daire Byrne
2024-11-15  4:56   ` NeilBrown
2024-11-15 14:18     ` Chuck Lever
2024-11-18 15:43     ` Daire Byrne
2024-11-13 14:48 ` Chuck Lever III
2024-11-15  9:03   ` NeilBrown
2024-11-15 14:27     ` Chuck Lever

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1db227ccb347c7877998ab20a609bc3a9896c7db.camel@kernel.org \
    --to=jlayton@kernel.org \
    --cc=Dai.Ngo@oracle.com \
    --cc=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=okorniev@redhat.com \
    --cc=tom@talpey.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox