From: "Darrick J. Wong" <djwong@kernel.org>
To: tytso@mit.edu
Cc: linux-ext4@vger.kernel.org
Subject: [PATCH 19/23] cache: implement automatic shrinking
Date: Thu, 06 Nov 2025 14:48:07 -0800 [thread overview]
Message-ID: <176246795908.2864310.4023008384897404874.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <176246795459.2864310.10641701647593035148.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Shrink the cache whenever maxcount has been expanded beyond its initial
value, we release a cached object to one of the mru lists and the number
of objects sitting on the mru is enough to drop the cache count down a
level. This enables a cache to reduce its memory consumption after a
spike in which reclamation wasn't possible.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
lib/support/cache.h | 20 +++++++--
lib/support/cache.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 129 insertions(+), 10 deletions(-)
diff --git a/lib/support/cache.h b/lib/support/cache.h
index 32b99b5fe733e3..c7c8298c115d50 100644
--- a/lib/support/cache.h
+++ b/lib/support/cache.h
@@ -16,7 +16,11 @@
*/
#define CACHE_MISCOMPARE_PURGE (1 << 0)
-#define CACHE_FLAGS_ALL (CACHE_MISCOMPARE_PURGE)
+/* Automatically shrink the cache's max_count when possible. */
+#define CACHE_AUTO_SHRINK (1 << 1)
+
+#define CACHE_FLAGS_ALL (CACHE_MISCOMPARE_PURGE | \
+ CACHE_AUTO_SHRINK)
/*
* cache object campare return values
@@ -69,12 +73,18 @@ typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *);
typedef int (*cache_node_get_t)(struct cache *c, struct cache_node *cn);
typedef void (*cache_node_put_t)(struct cache *c, struct cache_node *cn);
typedef unsigned int (*cache_node_resize_t)(const struct cache *c,
- unsigned int curr_size);
+ unsigned int curr_size,
+ int dir);
static inline unsigned int cache_gradual_resize(const struct cache *cache,
- unsigned int curr_size)
+ unsigned int curr_size,
+ int dir)
{
- return curr_size * 5 / 4;
+ if (dir < 0)
+ return curr_size * 9 / 10;
+ else if (dir > 0)
+ return curr_size * 5 / 4;
+ return curr_size;
}
struct cache_operations {
@@ -113,6 +123,7 @@ struct cache_node {
struct cache {
int c_flags; /* behavioural flags */
+ unsigned int c_orig_max; /* original max cache nodes */
unsigned int c_maxcount; /* max cache nodes */
unsigned int c_count; /* count of nodes */
pthread_mutex_t c_mutex; /* node count mutex */
@@ -145,6 +156,7 @@ void cache_destroy(struct cache *cache);
void cache_walk(struct cache *cache, cache_walk_t fn, void *data);
void cache_purge(struct cache *);
bool cache_flush(struct cache *cache);
+void cache_shrink(struct cache *cache);
void cache_set_maxcount(struct cache *cache, unsigned int maxcount);
int cache_set_flag(struct cache *cache, int flags);
diff --git a/lib/support/cache.c b/lib/support/cache.c
index 99044248b85d38..3a9e276f11af72 100644
--- a/lib/support/cache.c
+++ b/lib/support/cache.c
@@ -53,6 +53,7 @@ cache_init(
cache->c_hits = 0;
cache->c_misses = 0;
cache->c_maxcount = maxcount;
+ cache->c_orig_max = maxcount;
cache->hash = cache_operations->hash;
cache->alloc = cache_operations->alloc;
cache->flush = cache_operations->flush;
@@ -93,6 +94,7 @@ cache_set_maxcount(
unsigned int maxcount)
{
pthread_mutex_lock(&cache->c_mutex);
+ cache->c_orig_max = maxcount;
cache->c_maxcount = maxcount;
pthread_mutex_unlock(&cache->c_mutex);
}
@@ -123,7 +125,7 @@ cache_expand(
pthread_mutex_lock(&cache->c_mutex);
if (cache->resize)
- new_size = cache->resize(cache, cache->c_maxcount);
+ new_size = cache->resize(cache, cache->c_maxcount, 1);
if (new_size <= cache->c_maxcount)
new_size = cache->c_maxcount * 2;
#ifdef CACHE_DEBUG
@@ -254,7 +256,8 @@ static unsigned int
cache_shake(
struct cache * cache,
unsigned int priority,
- bool purge)
+ bool purge,
+ unsigned int nr_to_shake)
{
struct cache_mru *mru;
struct cache_hash *hash;
@@ -302,7 +305,7 @@ cache_shake(
pthread_mutex_unlock(&node->cn_mutex);
count++;
- if (!purge && count == CACHE_SHAKE_COUNT)
+ if (!purge && count == nr_to_shake)
break;
}
pthread_mutex_unlock(&mru->cm_mutex);
@@ -315,7 +318,7 @@ cache_shake(
pthread_mutex_unlock(&cache->c_mutex);
}
- return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
+ return (count == nr_to_shake) ? priority : ++priority;
}
/*
@@ -505,7 +508,7 @@ cache_node_get(
node = cache_node_allocate(cache, key);
if (node)
break;
- priority = cache_shake(cache, priority, false);
+ priority = cache_shake(cache, priority, false, CACHE_SHAKE_COUNT);
/*
* We start at 0; if we free CACHE_SHAKE_COUNT we get
* back the same priority, if not we get back priority+1.
@@ -535,12 +538,112 @@ cache_node_get(
return 1;
}
+static unsigned int cache_mru_count(const struct cache *cache)
+{
+ const struct cache_mru *mru = cache->c_mrus;
+ unsigned int mru_count = 0;
+ unsigned int i;
+
+ for (i = 0; i < CACHE_NR_PRIORITIES; i++, mru++)
+ mru_count += mru->cm_count;
+
+ return mru_count;
+}
+
+
+void cache_shrink(struct cache *cache)
+{
+ unsigned int mru_count = 0;
+ unsigned int threshold = 0;
+ unsigned int priority = 0;
+ unsigned int new_size;
+
+ pthread_mutex_lock(&cache->c_mutex);
+ /* Don't shrink below the original cache size */
+ if (cache->c_maxcount <= cache->c_orig_max)
+ goto out_unlock;
+
+ mru_count = cache_mru_count(cache);
+
+ /*
+ * If there's not even a batch of nodes on the MRU to try to free,
+ * don't bother with the rest.
+ */
+ if (mru_count < CACHE_SHAKE_COUNT)
+ goto out_unlock;
+
+ /*
+ * Figure out the next step down in size, but don't go below the
+ * original size.
+ */
+ if (cache->resize)
+ new_size = cache->resize(cache, cache->c_maxcount, -1);
+ else
+ new_size = cache->c_maxcount / 2;
+ if (new_size >= cache->c_maxcount)
+ goto out_unlock;
+ if (new_size < cache->c_orig_max)
+ new_size = cache->c_orig_max;
+
+ /*
+ * If we can't purge enough nodes to get the node count below new_size,
+ * don't resize the cache.
+ */
+ if (cache->c_count - mru_count >= new_size)
+ goto out_unlock;
+
+#ifdef CACHE_DEBUG
+ fprintf(stderr, "decreasing cache max size from %u to %u (currently %u)\n",
+ cache->c_maxcount, new_size, cache->c_count);
+#endif
+ cache->c_maxcount = new_size;
+
+ /* Try to reduce the number of cached objects. */
+ do {
+ unsigned int new_priority;
+
+ /*
+ * The threshold is the amount we need to purge to get c_count
+ * below the new maxcount. Try to free some objects off the
+ * MRU. Drop c_mutex because cache_shake will take it.
+ */
+ threshold = cache->c_count - new_size;
+ pthread_mutex_unlock(&cache->c_mutex);
+
+ new_priority = cache_shake(cache, priority, false, threshold);
+
+ /* Either we made no progress or we ran out of MRU levels */
+ if (new_priority == priority ||
+ new_priority > CACHE_MAX_PRIORITY)
+ return;
+ priority = new_priority;
+
+ pthread_mutex_lock(&cache->c_mutex);
+ /*
+ * Someone could have walked in and changed the cache maxsize
+ * again while we had the lock dropped. If that happened, stop
+ * clearing.
+ */
+ if (cache->c_maxcount != new_size)
+ goto out_unlock;
+
+ mru_count = cache_mru_count(cache);
+ if (cache->c_count - mru_count >= new_size)
+ goto out_unlock;
+ } while (1);
+
+out_unlock:
+ pthread_mutex_unlock(&cache->c_mutex);
+ return;
+}
+
void
cache_node_put(
struct cache * cache,
struct cache_node * node)
{
struct cache_mru * mru;
+ bool was_put = false;
pthread_mutex_lock(&node->cn_mutex);
#ifdef CACHE_DEBUG
@@ -556,6 +659,7 @@ cache_node_put(
}
#endif
node->cn_count--;
+ was_put = (node->cn_count == 0);
if (node->cn_count == 0 && cache->put)
cache->put(cache, node);
@@ -569,6 +673,9 @@ cache_node_put(
}
pthread_mutex_unlock(&node->cn_mutex);
+
+ if (was_put && (cache->c_flags & CACHE_AUTO_SHRINK))
+ cache_shrink(cache);
}
void
@@ -660,7 +767,7 @@ cache_purge(
int i;
for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
- cache_shake(cache, i, true);
+ cache_shake(cache, i, true, CACHE_SHAKE_COUNT);
#ifdef CACHE_DEBUG
if (cache->c_count != 0) {
next prev parent reply other threads:[~2025-11-06 22:48 UTC|newest]
Thread overview: 84+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-11-06 22:14 [PATCHBOMB 1.48] fuse2fs: new features, new server Darrick J. Wong
2025-11-06 22:27 ` [PATCHSET 1/9] fuse2fs: fix locking problems Darrick J. Wong
2025-11-06 22:30 ` [PATCH 1/4] libext2fs: add POSIX advisory locking to the unix IO manager Darrick J. Wong
2025-11-06 22:30 ` [PATCH 2/4] fuse2fs: try to lock filesystem image files before using them Darrick J. Wong
2025-11-06 22:30 ` [PATCH 3/4] fuse2fs: quiet down write-protect warning Darrick J. Wong
2025-11-06 22:31 ` [PATCH 4/4] fuse2fs: try to grab block device O_EXCL repeatedly Darrick J. Wong
2025-11-06 22:28 ` [PATCHSET 2/9] fuse2fs: add some easy new features Darrick J. Wong
2025-11-06 22:31 ` [PATCH 01/19] libext2fs: initialize htree when expanding directory Darrick J. Wong
2025-11-06 22:31 ` [PATCH 02/19] libext2fs: create link count adjustment helpers for dir_nlink Darrick J. Wong
2025-11-06 22:31 ` [PATCH 03/19] libext2fs: fix ext2fs_mmp_update Darrick J. Wong
2025-11-06 22:32 ` [PATCH 04/19] libext2fs: refactor aligned MMP buffer allocation Darrick J. Wong
2025-11-06 22:32 ` [PATCH 05/19] libext2fs: always use ext2fs_mmp_get_mem to allocate fs->mmp_buf Darrick J. Wong
2025-11-06 22:32 ` [PATCH 06/19] fuse2fs: check root directory while mounting Darrick J. Wong
2025-11-06 22:32 ` [PATCH 07/19] fuse2fs: read bitmaps asynchronously during initialization Darrick J. Wong
2025-11-06 22:33 ` [PATCH 08/19] fuse2fs: use file handles when possible Darrick J. Wong
2025-11-06 22:33 ` [PATCH 09/19] fuse2fs: implement dir seeking Darrick J. Wong
2025-11-06 22:33 ` [PATCH 10/19] fuse2fs: implement readdirplus Darrick J. Wong
2025-11-06 22:34 ` [PATCH 11/19] fuse2fs: implement dirsync mode Darrick J. Wong
2025-11-06 22:34 ` [PATCH 12/19] fuse2fs: only flush O_SYNC files on close Darrick J. Wong
2025-11-06 22:34 ` [PATCH 13/19] fuse2fs: improve want_extra_isize handling Darrick J. Wong
2025-11-06 22:34 ` [PATCH 14/19] fuse2fs: cache symlink targets in the kernel Darrick J. Wong
2025-11-06 22:35 ` [PATCH 15/19] fuse2fs: constrain worker thread count Darrick J. Wong
2025-11-06 22:35 ` [PATCH 16/19] fuse2fs: improve error handling behaviors Darrick J. Wong
2025-11-06 22:35 ` [PATCH 17/19] fuse2fs: fix link count overflows on dir_nlink filesystems Darrick J. Wong
2025-11-06 22:35 ` [PATCH 18/19] libsupport: add background thread manager Darrick J. Wong
2025-11-06 22:36 ` [PATCH 19/19] fuse2fs: implement MMP updates Darrick J. Wong
2025-11-06 22:28 ` [PATCHSET 3/9] fuse2fs: clean up operation startup Darrick J. Wong
2025-11-06 22:36 ` [PATCH 1/9] fuse2fs: rework FUSE2FS_CHECK_CONTEXT not to rely on global_fs Darrick J. Wong
2025-11-06 22:36 ` [PATCH 2/9] fuse2fs: rework checking file handles Darrick J. Wong
2025-11-06 22:36 ` [PATCH 3/9] fuse2fs: rework fallocate file handle extraction Darrick J. Wong
2025-11-06 22:37 ` [PATCH 4/9] fuse2fs: consolidate file handle checking in op_ioctl Darrick J. Wong
2025-11-06 22:37 ` [PATCH 5/9] fuse2fs: move fs assignment closer to locking the bfl Darrick J. Wong
2025-11-06 22:37 ` [PATCH 6/9] fuse2fs: clean up operation startup Darrick J. Wong
2025-11-06 22:37 ` [PATCH 7/9] fuse2fs: clean up operation completion Darrick J. Wong
2025-11-06 22:38 ` [PATCH 8/9] fuse2fs: clean up more boilerplate Darrick J. Wong
2025-11-06 22:38 ` [PATCH 9/9] fuse2fs: collect runtime of various operations Darrick J. Wong
2025-11-06 22:28 ` [PATCHSET 4/9] fuse2fs: refactor unmount code Darrick J. Wong
2025-11-06 22:38 ` [PATCH 1/3] fuse2fs: get rid of the global_fs variable Darrick J. Wong
2025-11-06 22:39 ` [PATCH 2/3] fuse2fs: hoist lockfile code Darrick J. Wong
2025-11-06 22:39 ` [PATCH 3/3] fuse2fs: hoist unmount code from main Darrick J. Wong
2025-11-06 22:28 ` [PATCHSET 5/9] fuse2fs: refactor mount code Darrick J. Wong
2025-11-06 22:39 ` [PATCH 1/3] fuse2fs: split filesystem mounting into helper functions Darrick J. Wong
2025-11-06 22:39 ` [PATCH 2/3] fuse2fs: register as an IO flusher thread Darrick J. Wong
2025-11-06 22:40 ` [PATCH 3/3] fuse2fs: adjust OOM killer score if possible Darrick J. Wong
2025-11-06 22:29 ` [PATCHSET 6/9] fuse2fs: improve operation tracing Darrick J. Wong
2025-11-06 22:40 ` [PATCH 1/4] fuse2fs: hook library error message printing Darrick J. Wong
2025-11-06 22:40 ` [PATCH 2/4] fuse2fs: print the function name in error messages, not the file name Darrick J. Wong
2025-11-06 22:40 ` [PATCH 3/4] fuse2fs: improve tracing for file range operations Darrick J. Wong
2025-11-06 22:41 ` [PATCH 4/4] fuse2fs: record thread id in debug trace data Darrick J. Wong
2025-11-06 22:29 ` [PATCHSET 7/9] fuse2fs: better tracking of writable state Darrick J. Wong
2025-11-06 22:41 ` [PATCH 1/3] fuse2fs: pass a struct fuse2fs to fs_writeable Darrick J. Wong
2025-11-06 22:41 ` [PATCH 2/3] fuse2fs: track our own writable state Darrick J. Wong
2025-11-06 22:41 ` [PATCH 3/3] fuse2fs: enable the shutdown ioctl Darrick J. Wong
2025-11-06 22:29 ` [PATCHSET 8/9] fuse2fs: upgrade to libfuse 3.17 Darrick J. Wong
2025-11-06 22:42 ` [PATCH 1/4] fuse2fs: bump library version Darrick J. Wong
2025-11-06 22:42 ` [PATCH 2/4] fuse2fs: wrap the fuse_set_feature_flag helper for older libfuse Darrick J. Wong
2025-11-06 22:42 ` [PATCH 3/4] fuse2fs: disable nfs exports Darrick J. Wong
2025-11-06 22:43 ` [PATCH 4/4] fuse2fs: drop fuse 2.x support code Darrick J. Wong
2025-11-06 22:30 ` [PATCHSET 9/9] fuse4fs: fork a low level fuse server Darrick J. Wong
2025-11-06 22:43 ` [PATCH 01/23] fuse2fs: separate libfuse3 and fuse2fs detection in configure Darrick J. Wong
2025-11-06 22:43 ` [PATCH 02/23] fuse2fs: start porting fuse2fs to lowlevel libfuse API Darrick J. Wong
2025-11-06 22:43 ` [PATCH 03/23] debian: create new package for fuse4fs Darrick J. Wong
2025-11-06 22:44 ` [PATCH 04/23] fuse4fs: namespace some helpers Darrick J. Wong
2025-11-07 8:09 ` Amir Goldstein
2025-11-08 0:25 ` Darrick J. Wong
2025-11-06 22:44 ` [PATCH 05/23] fuse4fs: convert to low level API Darrick J. Wong
2025-11-06 22:44 ` [PATCH 06/23] libsupport: port the kernel list.h to libsupport Darrick J. Wong
2025-11-06 22:44 ` [PATCH 07/23] libsupport: add a cache Darrick J. Wong
2025-11-06 22:45 ` [PATCH 08/23] cache: disable debugging Darrick J. Wong
2025-11-06 22:45 ` [PATCH 09/23] cache: use modern list iterator macros Darrick J. Wong
2025-11-06 22:45 ` [PATCH 10/23] cache: embed struct cache in the owner Darrick J. Wong
2025-11-06 22:45 ` [PATCH 11/23] cache: pass cache pointer to callbacks Darrick J. Wong
2025-11-06 22:46 ` [PATCH 12/23] cache: pass a private data pointer through cache_walk Darrick J. Wong
2025-11-06 22:46 ` [PATCH 13/23] cache: add a helper to grab a new refcount for a cache_node Darrick J. Wong
2025-11-06 22:46 ` [PATCH 14/23] cache: return results of a cache flush Darrick J. Wong
2025-11-06 22:47 ` [PATCH 15/23] cache: add a "get only if incore" flag to cache_node_get Darrick J. Wong
2025-11-06 22:47 ` [PATCH 16/23] cache: support gradual expansion Darrick J. Wong
2025-11-06 22:47 ` [PATCH 17/23] cache: support updating maxcount and flags Darrick J. Wong
2025-11-06 22:47 ` [PATCH 18/23] cache: support channging flags Darrick J. Wong
2025-11-06 22:48 ` Darrick J. Wong [this message]
2025-11-06 22:48 ` [PATCH 20/23] fuse4fs: add cache to track open files Darrick J. Wong
2025-11-06 22:48 ` [PATCH 21/23] fuse4fs: use the orphaned inode list Darrick J. Wong
2025-11-06 22:48 ` [PATCH 22/23] fuse4fs: implement FUSE_TMPFILE Darrick J. Wong
2025-11-06 22:49 ` [PATCH 23/23] fuse4fs: create incore reverse orphan list Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=176246795908.2864310.4023008384897404874.stgit@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox