public inbox for linux-ext4@vger.kernel.org
 help / color / mirror / Atom feed
From: "Darrick J. Wong" <djwong@kernel.org>
To: tytso@mit.edu
Cc: John@groves.net, bernd@bsbernd.com,
	linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org,
	miklos@szeredi.hu, amir73il@gmail.com, joannelkoong@gmail.com,
	neal@gompa.dev
Subject: [PATCH 16/20] cache: implement automatic shrinking
Date: Wed, 20 Aug 2025 18:12:02 -0700	[thread overview]
Message-ID: <175573713095.20753.14080863399069888459.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <175573712721.20753.5223489399594191991.stgit@frogsfrogsfrogs>

From: Darrick J. Wong <djwong@kernel.org>

Shrink the cache whenever maxcount has been expanded beyond its initial
value, we release a cached object to one of the mru lists and the number
of objects sitting on the mru is enough to drop the cache count down a
level.  This enables a cache to reduce its memory consumption after a
spike in which reclamation wasn't possible.

Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
 lib/support/cache.h |   17 ++++++-
 lib/support/cache.c |  118 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 126 insertions(+), 9 deletions(-)


diff --git a/lib/support/cache.h b/lib/support/cache.h
index ae37945c545f46..cd738b6cd3a460 100644
--- a/lib/support/cache.h
+++ b/lib/support/cache.h
@@ -16,6 +16,9 @@
  */
 #define CACHE_MISCOMPARE_PURGE	(1 << 0)
 
+/* Automatically shrink the cache's max_count when possible. */
+#define CACHE_CAN_SHRINK	(1U << 1)
+
 /*
  * cache object campare return values
  */
@@ -67,12 +70,18 @@ typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *);
 typedef int (*cache_node_get_t)(struct cache *c, struct cache_node *cn);
 typedef void (*cache_node_put_t)(struct cache *c, struct cache_node *cn);
 typedef unsigned int (*cache_node_resize_t)(const struct cache *c,
-					    unsigned int curr_size);
+					    unsigned int curr_size,
+					    int dir);
 
 static inline unsigned int cache_gradual_resize(const struct cache *cache,
-						unsigned int curr_size)
+						unsigned int curr_size,
+						int dir)
 {
-	return curr_size * 5 / 4;
+	if (dir < 0)
+		return curr_size * 9 / 10;
+	else if (dir > 0)
+		return curr_size * 5 / 4;
+	return curr_size;
 }
 
 struct cache_operations {
@@ -111,6 +120,7 @@ struct cache_node {
 
 struct cache {
 	int			c_flags;	/* behavioural flags */
+	unsigned int		c_orig_max;	/* original max cache nodes */
 	unsigned int		c_maxcount;	/* max cache nodes */
 	unsigned int		c_count;	/* count of nodes */
 	pthread_mutex_t		c_mutex;	/* node count mutex */
@@ -143,6 +153,7 @@ void cache_destroy(struct cache *cache);
 void cache_walk(struct cache *cache, cache_walk_t fn, void *data);
 void cache_purge(struct cache *);
 bool cache_flush(struct cache *cache);
+void cache_shrink(struct cache *cache);
 
 /* don't allocate a new node */
 #define CACHE_GET_INCORE	(1U << 0)
diff --git a/lib/support/cache.c b/lib/support/cache.c
index dbaddc1bd36d3d..7e1ddc3cc8788d 100644
--- a/lib/support/cache.c
+++ b/lib/support/cache.c
@@ -53,6 +53,7 @@ cache_init(
 	cache->c_hits = 0;
 	cache->c_misses = 0;
 	cache->c_maxcount = maxcount;
+	cache->c_orig_max = maxcount;
 	cache->hash = cache_operations->hash;
 	cache->alloc = cache_operations->alloc;
 	cache->flush = cache_operations->flush;
@@ -95,7 +96,7 @@ cache_expand(
 
 	pthread_mutex_lock(&cache->c_mutex);
 	if (cache->resize)
-		new_size = cache->resize(cache, cache->c_maxcount);
+		new_size = cache->resize(cache, cache->c_maxcount, 1);
 	if (new_size <= cache->c_maxcount)
 		new_size = cache->c_maxcount * 2;
 #ifdef CACHE_DEBUG
@@ -226,7 +227,8 @@ static unsigned int
 cache_shake(
 	struct cache *		cache,
 	unsigned int		priority,
-	bool			purge)
+	bool			purge,
+	unsigned int		nr_to_shake)
 {
 	struct cache_mru	*mru;
 	struct cache_hash	*hash;
@@ -274,7 +276,7 @@ cache_shake(
 		pthread_mutex_unlock(&node->cn_mutex);
 
 		count++;
-		if (!purge && count == CACHE_SHAKE_COUNT)
+		if (!purge && count == nr_to_shake)
 			break;
 	}
 	pthread_mutex_unlock(&mru->cm_mutex);
@@ -287,7 +289,7 @@ cache_shake(
 		pthread_mutex_unlock(&cache->c_mutex);
 	}
 
-	return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
+	return (count == nr_to_shake) ? priority : ++priority;
 }
 
 /*
@@ -477,7 +479,7 @@ cache_node_get(
 		node = cache_node_allocate(cache, key);
 		if (node)
 			break;
-		priority = cache_shake(cache, priority, false);
+		priority = cache_shake(cache, priority, false, CACHE_SHAKE_COUNT);
 		/*
 		 * We start at 0; if we free CACHE_SHAKE_COUNT we get
 		 * back the same priority, if not we get back priority+1.
@@ -507,12 +509,112 @@ cache_node_get(
 	return 1;
 }
 
+static unsigned int cache_mru_count(const struct cache *cache)
+{
+	const struct cache_mru	*mru = cache->c_mrus;
+	unsigned int		mru_count = 0;
+	unsigned int		i;
+
+	for (i = 0; i < CACHE_NR_PRIORITIES; i++, mru++)
+		mru_count += mru->cm_count;
+
+	return mru_count;
+}
+
+
+void cache_shrink(struct cache *cache)
+{
+	unsigned int		mru_count = 0;
+	unsigned int		threshold = 0;
+	unsigned int		priority = 0;
+	unsigned int		new_size;
+
+	pthread_mutex_lock(&cache->c_mutex);
+	/* Don't shrink below the original cache size */
+	if (cache->c_maxcount <= cache->c_orig_max)
+		goto out_unlock;
+
+	mru_count = cache_mru_count(cache);
+
+	/*
+	 * If there's not even a batch of nodes on the MRU to try to free,
+	 * don't bother with the rest.
+	 */
+	if (mru_count < CACHE_SHAKE_COUNT)
+		goto out_unlock;
+
+	/*
+	 * Figure out the next step down in size, but don't go below the
+	 * original size.
+	 */
+	if (cache->resize)
+		new_size = cache->resize(cache, cache->c_maxcount, -1);
+	else
+		new_size = cache->c_maxcount / 2;
+	if (new_size >= cache->c_maxcount)
+		goto out_unlock;
+	if (new_size < cache->c_orig_max)
+		new_size = cache->c_orig_max;
+
+	/*
+	 * If we can't purge enough nodes to get the node count below new_size,
+	 * don't resize the cache.
+	 */
+	if (cache->c_count - mru_count >= new_size)
+		goto out_unlock;
+
+#ifdef CACHE_DEBUG
+	fprintf(stderr, "decreasing cache max size from %u to %u (currently %u)\n",
+		cache->c_maxcount, new_size, cache->c_count);
+#endif
+	cache->c_maxcount = new_size;
+
+	/* Try to reduce the number of cached objects. */
+	do {
+		unsigned int new_priority;
+
+		/*
+		 * The threshold is the amount we need to purge to get c_count
+		 * below the new maxcount.  Try to free some objects off the
+		 * MRU.  Drop c_mutex because cache_shake will take it.
+		 */
+		threshold = cache->c_count - new_size;
+		pthread_mutex_unlock(&cache->c_mutex);
+
+		new_priority = cache_shake(cache, priority, false, threshold);
+
+		/* Either we made no progress or we ran out of MRU levels */
+		if (new_priority == priority ||
+		    new_priority > CACHE_MAX_PRIORITY)
+			return;
+		priority = new_priority;
+
+		pthread_mutex_lock(&cache->c_mutex);
+		/*
+		 * Someone could have walked in and changed the cache maxsize
+		 * again while we had the lock dropped.  If that happened, stop
+		 * clearing.
+		 */
+		if (cache->c_maxcount != new_size)
+			goto out_unlock;
+
+		mru_count = cache_mru_count(cache);
+		if (cache->c_count - mru_count >= new_size)
+			goto out_unlock;
+	} while (1);
+
+out_unlock:
+	pthread_mutex_unlock(&cache->c_mutex);
+	return;
+}
+
 void
 cache_node_put(
 	struct cache *		cache,
 	struct cache_node *	node)
 {
 	struct cache_mru *	mru;
+	bool was_put = false;
 
 	pthread_mutex_lock(&node->cn_mutex);
 #ifdef CACHE_DEBUG
@@ -528,6 +630,7 @@ cache_node_put(
 	}
 #endif
 	node->cn_count--;
+	was_put = (node->cn_count == 0);
 
 	if (node->cn_count == 0 && cache->put)
 		cache->put(cache, node);
@@ -541,6 +644,9 @@ cache_node_put(
 	}
 
 	pthread_mutex_unlock(&node->cn_mutex);
+
+	if (was_put && (cache->c_flags & CACHE_CAN_SHRINK))
+		cache_shrink(cache);
 }
 
 void
@@ -632,7 +738,7 @@ cache_purge(
 	int			i;
 
 	for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
-		cache_shake(cache, i, true);
+		cache_shake(cache, i, true, CACHE_SHAKE_COUNT);
 
 #ifdef CACHE_DEBUG
 	if (cache->c_count != 0) {


  parent reply	other threads:[~2025-08-21  1:12 UTC|newest]

Thread overview: 72+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-21  0:37 [RFC v4] fuse: use fs-iomap for better performance so we can containerize ext4 Darrick J. Wong
2025-08-21  0:49 ` [PATCHSET RFC v4 1/6] fuse4fs: fork a low level fuse server Darrick J. Wong
2025-08-21  1:08   ` [PATCH 01/20] fuse2fs: port fuse2fs to lowlevel libfuse API Darrick J. Wong
2025-08-21  1:08   ` [PATCH 02/20] fuse4fs: drop fuse 2.x support code Darrick J. Wong
2025-08-21  1:08   ` [PATCH 03/20] fuse4fs: namespace some helpers Darrick J. Wong
2025-08-21  1:08   ` [PATCH 04/20] fuse4fs: convert to low level API Darrick J. Wong
2025-08-21  1:09   ` [PATCH 05/20] libsupport: port the kernel list.h to libsupport Darrick J. Wong
2025-08-21  1:09   ` [PATCH 06/20] libsupport: add a cache Darrick J. Wong
2025-08-21  1:09   ` [PATCH 07/20] cache: disable debugging Darrick J. Wong
2025-08-21  1:09   ` [PATCH 08/20] cache: use modern list iterator macros Darrick J. Wong
2025-08-21  1:10   ` [PATCH 09/20] cache: embed struct cache in the owner Darrick J. Wong
2025-08-21  1:10   ` [PATCH 10/20] cache: pass cache pointer to callbacks Darrick J. Wong
2025-08-21  1:10   ` [PATCH 11/20] cache: pass a private data pointer through cache_walk Darrick J. Wong
2025-08-21  1:11   ` [PATCH 12/20] cache: add a helper to grab a new refcount for a cache_node Darrick J. Wong
2025-08-21  1:11   ` [PATCH 13/20] cache: return results of a cache flush Darrick J. Wong
2025-08-21  1:11   ` [PATCH 14/20] cache: add a "get only if incore" flag to cache_node_get Darrick J. Wong
2025-08-21  1:11   ` [PATCH 15/20] cache: support gradual expansion Darrick J. Wong
2025-08-21  1:12   ` Darrick J. Wong [this message]
2025-08-21  1:12   ` [PATCH 17/20] fuse4fs: add cache to track open files Darrick J. Wong
2025-08-21  1:12   ` [PATCH 18/20] fuse4fs: use the orphaned inode list Darrick J. Wong
2025-08-21  1:12   ` [PATCH 19/20] fuse4fs: implement FUSE_TMPFILE Darrick J. Wong
2025-08-21  1:13   ` [PATCH 20/20] fuse4fs: create incore reverse orphan list Darrick J. Wong
2025-08-21  0:49 ` [PATCHSET RFC v4 2/6] libext2fs: refactoring for fuse2fs iomap support Darrick J. Wong
2025-08-21  1:13   ` [PATCH 01/10] libext2fs: make it possible to extract the fd from an IO manager Darrick J. Wong
2025-08-21  1:13   ` [PATCH 02/10] libext2fs: always fsync the device when flushing the cache Darrick J. Wong
2025-08-21  1:13   ` [PATCH 03/10] libext2fs: always fsync the device when closing the unix IO manager Darrick J. Wong
2025-08-21  1:14   ` [PATCH 04/10] libext2fs: only fsync the unix fd if we wrote to the device Darrick J. Wong
2025-08-21  1:14   ` [PATCH 05/10] libext2fs: invalidate cached blocks when freeing them Darrick J. Wong
2025-08-21  1:14   ` [PATCH 06/10] libext2fs: only flush affected blocks in unix_write_byte Darrick J. Wong
2025-08-21  1:14   ` [PATCH 07/10] libext2fs: allow unix_write_byte when the write would be aligned Darrick J. Wong
2025-08-21  1:15   ` [PATCH 08/10] libext2fs: allow clients to ask to write full superblocks Darrick J. Wong
2025-08-21  1:15   ` [PATCH 09/10] libext2fs: allow callers to disallow I/O to file data blocks Darrick J. Wong
2025-08-21  1:15   ` [PATCH 10/10] libext2fs: add posix advisory locking to the unix IO manager Darrick J. Wong
2025-08-21  0:49 ` [PATCHSET RFC v4 3/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21  1:15   ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
2025-08-21  1:16   ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
2025-08-21  1:16   ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
2025-08-21  1:16   ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
2025-08-21  1:17   ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
2025-08-21  1:17   ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
2025-08-21  1:17   ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
2025-08-21  1:17   ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
2025-08-21  1:18   ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
2025-08-21  1:18   ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
2025-08-21  1:18   ` [PATCH 11/19] fuse2fs: avoid fuseblk mode if fuse-iomap support is likely Darrick J. Wong
2025-08-21  1:18   ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
2025-08-21  1:19   ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
2025-08-21  1:19   ` [PATCH 14/19] fuse2fs: add strictatime/lazytime mount options Darrick J. Wong
2025-08-21  1:19   ` [PATCH 15/19] fuse2fs: configure block device block size Darrick J. Wong
2025-08-21  1:19   ` [PATCH 16/19] fuse4fs: don't use inode number translation when possible Darrick J. Wong
2025-08-21  1:20   ` [PATCH 17/19] fuse4fs: separate invalidation Darrick J. Wong
2025-08-21  1:20   ` [PATCH 18/19] fuse2fs: implement statx Darrick J. Wong
2025-08-21  1:20   ` [PATCH 19/19] fuse2fs: enable atomic writes Darrick J. Wong
2025-08-21  0:50 ` [PATCHSET RFC v4 4/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21  1:20   ` [PATCH 1/2] fuse2fs: enable caching of iomaps Darrick J. Wong
2025-08-21  1:21   ` [PATCH 2/2] fuse2fs: be smarter about caching iomaps Darrick J. Wong
2025-08-21  0:50 ` [PATCHSET RFC v4 5/6] fuse2fs: handle timestamps and ACLs correctly when iomap is enabled Darrick J. Wong
2025-08-21  1:21   ` [PATCH 1/8] fuse2fs: skip permission checking on utimens " Darrick J. Wong
2025-08-21  1:21   ` [PATCH 2/8] fuse2fs: let the kernel tell us about acl/mode updates Darrick J. Wong
2025-08-21  1:21   ` [PATCH 3/8] fuse2fs: better debugging for file mode updates Darrick J. Wong
2025-08-21  1:22   ` [PATCH 4/8] fuse2fs: debug timestamp updates Darrick J. Wong
2025-08-21  1:22   ` [PATCH 5/8] fuse2fs: use coarse timestamps for iomap mode Darrick J. Wong
2025-08-21  1:22   ` [PATCH 6/8] fuse2fs: add tracing for retrieving timestamps Darrick J. Wong
2025-08-21  1:23   ` [PATCH 7/8] fuse2fs: enable syncfs Darrick J. Wong
2025-08-21  1:23   ` [PATCH 8/8] fuse2fs: skip the gdt write in op_destroy if syncfs is working Darrick J. Wong
2025-08-21  0:50 ` [PATCHSET RFC v4 6/6] fuse2fs: improve block and inode caching Darrick J. Wong
2025-08-21  1:23   ` [PATCH 1/6] libsupport: add caching IO manager Darrick J. Wong
2025-08-21  1:23   ` [PATCH 2/6] iocache: add the actual buffer cache Darrick J. Wong
2025-08-21  1:24   ` [PATCH 3/6] iocache: bump buffer mru priority every 50 accesses Darrick J. Wong
2025-08-21  1:24   ` [PATCH 4/6] fuse2fs: enable caching IO manager Darrick J. Wong
2025-08-21  1:24   ` [PATCH 5/6] fuse2fs: increase inode cache size Darrick J. Wong
2025-08-21  1:24   ` [PATCH 6/6] libext2fs: improve caching for inodes Darrick J. Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=175573713095.20753.14080863399069888459.stgit@frogsfrogsfrogs \
    --to=djwong@kernel.org \
    --cc=John@groves.net \
    --cc=amir73il@gmail.com \
    --cc=bernd@bsbernd.com \
    --cc=joannelkoong@gmail.com \
    --cc=linux-ext4@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=neal@gompa.dev \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox