Linux NFS development
 help / color / mirror / Atom feed
From: Dai Ngo <dai.ngo@oracle.com>
To: chuck.lever@oracle.com
Cc: linux-nfs@vger.kernel.org
Subject: [PATCH v2 2/2] NFSD: handling memory shortage condition with Courteous server.
Date: Mon,  4 Jul 2022 12:05:43 -0700	[thread overview]
Message-ID: <1656961543-25210-3-git-send-email-dai.ngo@oracle.com> (raw)
In-Reply-To: <1656961543-25210-1-git-send-email-dai.ngo@oracle.com>

Currently the idle timeout for courtesy client is fixed at 1 day. If
there are lots of courtesy clients remain in the system it can cause
memory resource shortage that effects the operations of other modules
in the kernel. This problem can be observed by running pynfs nfs4.0
CID5 test in a loop. Eventually system runs out of memory and rpc.gssd
fails to add new watch:

rpc.gssd[3851]: ERROR: inotify_add_watch failed for nfsd4_cb/clnt6c2e:
                No space left on device

and alloc_inode also fails with out of memory:

Call Trace:
<TASK>
        dump_stack_lvl+0x33/0x42
        dump_header+0x4a/0x1ed
        oom_kill_process+0x80/0x10d
        out_of_memory+0x237/0x25f
        __alloc_pages_slowpath.constprop.0+0x617/0x7b6
        __alloc_pages+0x132/0x1e3
        alloc_slab_page+0x15/0x33
        allocate_slab+0x78/0x1ab
        ? alloc_inode+0x38/0x8d
        ___slab_alloc+0x2af/0x373
        ? alloc_inode+0x38/0x8d
        ? slab_pre_alloc_hook.constprop.0+0x9f/0x158
        ? alloc_inode+0x38/0x8d
        __slab_alloc.constprop.0+0x1c/0x24
        kmem_cache_alloc_lru+0x8c/0x142
        alloc_inode+0x38/0x8d
        iget_locked+0x60/0x126
        kernfs_get_inode+0x18/0x105
        kernfs_iop_lookup+0x6d/0xbc
        __lookup_slow+0xb7/0xf9
        lookup_slow+0x3a/0x52
        walk_component+0x90/0x100
        ? inode_permission+0x87/0x128
        link_path_walk.part.0.constprop.0+0x266/0x2ea
        ? path_init+0x101/0x2f2
        path_lookupat+0x4c/0xfa
        filename_lookup+0x63/0xd7
        ? getname_flags+0x32/0x17a
        ? kmem_cache_alloc+0x11f/0x144
        ? getname_flags+0x16c/0x17a
        user_path_at_empty+0x37/0x4b
        do_readlinkat+0x61/0x102
        __x64_sys_readlinkat+0x18/0x1b
        do_syscall_64+0x57/0x72
        entry_SYSCALL_64_after_hwframe+0x46/0xb0

This patch addresses this problem by:

   . removing the fixed 1-day idle time limit for courtesy client.
     Courtesy client is now allowed to remain valid as long as the
     available system memory is above 80%.

   . when available system memory drops below 80%, laundromat starts
     trimming older courtesy clients. The number of courtesy clients
     to trim is a percentage of the total number of courtesy clients
     exist in the system.  This percentage is computed based on
     the current percentage of available system memory.

   . the percentage of number of courtesy clients to be trimmed
     is based on this table:

     ----------------------------------
     |  % memory | % courtesy clients |
     | available |    to trim         |
     ----------------------------------
     |  > 80     |      0             |
     |  > 70     |     10             |
     |  > 60     |     20             |
     |  > 50     |     40             |
     |  > 40     |     60             |
     |  > 30     |     80             |
     |  < 30     |    100             |
     ----------------------------------

   . due to the overhead associated with removing client record,
     there is a limit of 128 clients to be trimmed for each
     laundromat run. This is done to prevent the laundromat from
     spending too long destroying the clients and misses performing
     its other tasks in a timely manner.

   . the laundromat is scheduled to run sooner if there are more
     courtesy clients need to be destroyed.

Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
---
 fs/nfsd/nfs4state.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index a34ffb0d8c77..c9d3955976b9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -5794,14 +5794,50 @@ nfs4_anylock_blockers(struct nfs4_client *clp)
 	return false;
 }
 
+/*
+ * percentage of the number of courtesy clients to
+ * trim for a given percentage of available memory.
+ */
+static unsigned char avail_mem_to_trim_perc[11] = {
+	/*  0% */		100,
+	/* 10% */		100,
+	/* 20% */		100,
+	/* 30% */		80,
+	/* 40% */		60,
+	/* 50% */		40,
+	/* 60% */		20,
+	/* 70% */		10,
+	/* 80%, 90%, 100% */	0, 0, 0
+};
+#define	NFSD_COURTESY_CLIENT_MAX_TRIM_PER_RUN	128
+
+static unsigned int
+nfs4_get_maxreap(struct nfsd_net *nn)
+{
+	unsigned int clnts, avail;
+	struct sysinfo si;
+
+	si_meminfo(&si);
+	avail = ((si.freeram * 100) / (si.totalram - si.totalhigh) / 10);
+	if (!avail_mem_to_trim_perc[avail])
+		return 0;
+	clnts = atomic_read(&courtesy_client_count);
+	return min_t(unsigned int,
+		((clnts * avail_mem_to_trim_perc[avail]) / 100),
+		NFSD_COURTESY_CLIENT_MAX_TRIM_PER_RUN);
+}
+
 static void
 nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
 				struct laundry_time *lt)
 {
+	unsigned int maxreap, oldstate;
+	int reapcnt = 0;
 	struct list_head *pos, *next;
 	struct nfs4_client *clp;
 
 	INIT_LIST_HEAD(reaplist);
+	maxreap = nfs4_get_maxreap(nn);
 	spin_lock(&nn->client_lock);
 	list_for_each_safe(pos, next, &nn->client_lru) {
 		clp = list_entry(pos, struct nfs4_client, cl_lru);
@@ -5810,21 +5846,31 @@ nfs4_get_client_reaplist(struct nfsd_net *nn, struct list_head *reaplist,
 		if (!state_expired(lt, clp->cl_time))
 			break;
 		if (!atomic_read(&clp->cl_rpc_users)) {
-			if (xchg(&clp->cl_state, NFSD4_COURTESY) ==
-							NFSD4_ACTIVE)
+			oldstate = xchg(&clp->cl_state, NFSD4_COURTESY);
+			if (oldstate == NFSD4_ACTIVE)
 				atomic_inc(&courtesy_client_count);
 		}
-		if (!client_has_state(clp) ||
-				ktime_get_boottime_seconds() >=
-				(clp->cl_time + NFSD_COURTESY_CLIENT_TIMEOUT))
+		if (!client_has_state(clp))
 			goto exp_client;
 		if (nfs4_anylock_blockers(clp)) {
 exp_client:
-			if (!mark_client_expired_locked(clp))
+			if (!mark_client_expired_locked(clp)) {
 				list_add(&clp->cl_lru, reaplist);
+				reapcnt++;
+			}
+		} else {
+			/* expired client has state with no blocker */
+			if (oldstate != NFSD4_ACTIVE &&
+					(maxreap && reapcnt < maxreap))
+				goto exp_client;
 		}
 	}
 	spin_unlock(&nn->client_lock);
+
+	if (reapcnt == NFSD_COURTESY_CLIENT_MAX_TRIM_PER_RUN &&
+		atomic_read(&courtesy_client_count) >
+			NFSD_COURTESY_CLIENT_MAX_TRIM_PER_RUN)
+		lt->new_timeo = NFSD_LAUNDROMAT_MINTIMEOUT;
 }
 
 static time64_t
-- 
2.9.5


  parent reply	other threads:[~2022-07-04 19:05 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-04 19:05 [PATCH v2 0/2] NFSD: handling memory shortage problem with Courteous server Dai Ngo
2022-07-04 19:05 ` [PATCH v2 1/2] NFSD: keep track of the number of courtesy clients in the system Dai Ngo
2022-07-04 19:05 ` Dai Ngo [this message]
2022-07-05 14:50 ` [PATCH v2 0/2] NFSD: handling memory shortage problem with Courteous server Chuck Lever III
2022-07-05 18:42   ` dai.ngo
2022-07-05 19:08     ` Chuck Lever III
2022-07-06 15:46       ` J. Bruce Fields
2022-07-06 16:04         ` Chuck Lever III
2022-07-05 18:48   ` Jeff Layton
2022-07-05 19:15     ` Chuck Lever III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1656961543-25210-3-git-send-email-dai.ngo@oracle.com \
    --to=dai.ngo@oracle.com \
    --cc=chuck.lever@oracle.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox