From: "Darrick J. Wong" <djwong@kernel.org>
To: tytso@mit.edu
Cc: John@groves.net, bernd@bsbernd.com,
linux-fsdevel@vger.kernel.org, linux-ext4@vger.kernel.org,
miklos@szeredi.hu, amir73il@gmail.com, joannelkoong@gmail.com,
neal@gompa.dev
Subject: [PATCH 16/20] cache: implement automatic shrinking
Date: Wed, 20 Aug 2025 18:12:02 -0700 [thread overview]
Message-ID: <175573713095.20753.14080863399069888459.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <175573712721.20753.5223489399594191991.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Shrink the cache whenever maxcount has been expanded beyond its initial
value, we release a cached object to one of the mru lists and the number
of objects sitting on the mru is enough to drop the cache count down a
level. This enables a cache to reduce its memory consumption after a
spike in which reclamation wasn't possible.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
---
lib/support/cache.h | 17 ++++++-
lib/support/cache.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 126 insertions(+), 9 deletions(-)
diff --git a/lib/support/cache.h b/lib/support/cache.h
index ae37945c545f46..cd738b6cd3a460 100644
--- a/lib/support/cache.h
+++ b/lib/support/cache.h
@@ -16,6 +16,9 @@
*/
#define CACHE_MISCOMPARE_PURGE (1 << 0)
+/* Automatically shrink the cache's max_count when possible. */
+#define CACHE_CAN_SHRINK (1U << 1)
+
/*
* cache object campare return values
*/
@@ -67,12 +70,18 @@ typedef unsigned int (*cache_bulk_relse_t)(struct cache *, struct list_head *);
typedef int (*cache_node_get_t)(struct cache *c, struct cache_node *cn);
typedef void (*cache_node_put_t)(struct cache *c, struct cache_node *cn);
typedef unsigned int (*cache_node_resize_t)(const struct cache *c,
- unsigned int curr_size);
+ unsigned int curr_size,
+ int dir);
static inline unsigned int cache_gradual_resize(const struct cache *cache,
- unsigned int curr_size)
+ unsigned int curr_size,
+ int dir)
{
- return curr_size * 5 / 4;
+ if (dir < 0)
+ return curr_size * 9 / 10;
+ else if (dir > 0)
+ return curr_size * 5 / 4;
+ return curr_size;
}
struct cache_operations {
@@ -111,6 +120,7 @@ struct cache_node {
struct cache {
int c_flags; /* behavioural flags */
+ unsigned int c_orig_max; /* original max cache nodes */
unsigned int c_maxcount; /* max cache nodes */
unsigned int c_count; /* count of nodes */
pthread_mutex_t c_mutex; /* node count mutex */
@@ -143,6 +153,7 @@ void cache_destroy(struct cache *cache);
void cache_walk(struct cache *cache, cache_walk_t fn, void *data);
void cache_purge(struct cache *);
bool cache_flush(struct cache *cache);
+void cache_shrink(struct cache *cache);
/* don't allocate a new node */
#define CACHE_GET_INCORE (1U << 0)
diff --git a/lib/support/cache.c b/lib/support/cache.c
index dbaddc1bd36d3d..7e1ddc3cc8788d 100644
--- a/lib/support/cache.c
+++ b/lib/support/cache.c
@@ -53,6 +53,7 @@ cache_init(
cache->c_hits = 0;
cache->c_misses = 0;
cache->c_maxcount = maxcount;
+ cache->c_orig_max = maxcount;
cache->hash = cache_operations->hash;
cache->alloc = cache_operations->alloc;
cache->flush = cache_operations->flush;
@@ -95,7 +96,7 @@ cache_expand(
pthread_mutex_lock(&cache->c_mutex);
if (cache->resize)
- new_size = cache->resize(cache, cache->c_maxcount);
+ new_size = cache->resize(cache, cache->c_maxcount, 1);
if (new_size <= cache->c_maxcount)
new_size = cache->c_maxcount * 2;
#ifdef CACHE_DEBUG
@@ -226,7 +227,8 @@ static unsigned int
cache_shake(
struct cache * cache,
unsigned int priority,
- bool purge)
+ bool purge,
+ unsigned int nr_to_shake)
{
struct cache_mru *mru;
struct cache_hash *hash;
@@ -274,7 +276,7 @@ cache_shake(
pthread_mutex_unlock(&node->cn_mutex);
count++;
- if (!purge && count == CACHE_SHAKE_COUNT)
+ if (!purge && count == nr_to_shake)
break;
}
pthread_mutex_unlock(&mru->cm_mutex);
@@ -287,7 +289,7 @@ cache_shake(
pthread_mutex_unlock(&cache->c_mutex);
}
- return (count == CACHE_SHAKE_COUNT) ? priority : ++priority;
+ return (count == nr_to_shake) ? priority : ++priority;
}
/*
@@ -477,7 +479,7 @@ cache_node_get(
node = cache_node_allocate(cache, key);
if (node)
break;
- priority = cache_shake(cache, priority, false);
+ priority = cache_shake(cache, priority, false, CACHE_SHAKE_COUNT);
/*
* We start at 0; if we free CACHE_SHAKE_COUNT we get
* back the same priority, if not we get back priority+1.
@@ -507,12 +509,112 @@ cache_node_get(
return 1;
}
+static unsigned int cache_mru_count(const struct cache *cache)
+{
+ const struct cache_mru *mru = cache->c_mrus;
+ unsigned int mru_count = 0;
+ unsigned int i;
+
+ for (i = 0; i < CACHE_NR_PRIORITIES; i++, mru++)
+ mru_count += mru->cm_count;
+
+ return mru_count;
+}
+
+
+void cache_shrink(struct cache *cache)
+{
+ unsigned int mru_count = 0;
+ unsigned int threshold = 0;
+ unsigned int priority = 0;
+ unsigned int new_size;
+
+ pthread_mutex_lock(&cache->c_mutex);
+ /* Don't shrink below the original cache size */
+ if (cache->c_maxcount <= cache->c_orig_max)
+ goto out_unlock;
+
+ mru_count = cache_mru_count(cache);
+
+ /*
+ * If there's not even a batch of nodes on the MRU to try to free,
+ * don't bother with the rest.
+ */
+ if (mru_count < CACHE_SHAKE_COUNT)
+ goto out_unlock;
+
+ /*
+ * Figure out the next step down in size, but don't go below the
+ * original size.
+ */
+ if (cache->resize)
+ new_size = cache->resize(cache, cache->c_maxcount, -1);
+ else
+ new_size = cache->c_maxcount / 2;
+ if (new_size >= cache->c_maxcount)
+ goto out_unlock;
+ if (new_size < cache->c_orig_max)
+ new_size = cache->c_orig_max;
+
+ /*
+ * If we can't purge enough nodes to get the node count below new_size,
+ * don't resize the cache.
+ */
+ if (cache->c_count - mru_count >= new_size)
+ goto out_unlock;
+
+#ifdef CACHE_DEBUG
+ fprintf(stderr, "decreasing cache max size from %u to %u (currently %u)\n",
+ cache->c_maxcount, new_size, cache->c_count);
+#endif
+ cache->c_maxcount = new_size;
+
+ /* Try to reduce the number of cached objects. */
+ do {
+ unsigned int new_priority;
+
+ /*
+ * The threshold is the amount we need to purge to get c_count
+ * below the new maxcount. Try to free some objects off the
+ * MRU. Drop c_mutex because cache_shake will take it.
+ */
+ threshold = cache->c_count - new_size;
+ pthread_mutex_unlock(&cache->c_mutex);
+
+ new_priority = cache_shake(cache, priority, false, threshold);
+
+ /* Either we made no progress or we ran out of MRU levels */
+ if (new_priority == priority ||
+ new_priority > CACHE_MAX_PRIORITY)
+ return;
+ priority = new_priority;
+
+ pthread_mutex_lock(&cache->c_mutex);
+ /*
+ * Someone could have walked in and changed the cache maxsize
+ * again while we had the lock dropped. If that happened, stop
+ * clearing.
+ */
+ if (cache->c_maxcount != new_size)
+ goto out_unlock;
+
+ mru_count = cache_mru_count(cache);
+ if (cache->c_count - mru_count >= new_size)
+ goto out_unlock;
+ } while (1);
+
+out_unlock:
+ pthread_mutex_unlock(&cache->c_mutex);
+ return;
+}
+
void
cache_node_put(
struct cache * cache,
struct cache_node * node)
{
struct cache_mru * mru;
+ bool was_put = false;
pthread_mutex_lock(&node->cn_mutex);
#ifdef CACHE_DEBUG
@@ -528,6 +630,7 @@ cache_node_put(
}
#endif
node->cn_count--;
+ was_put = (node->cn_count == 0);
if (node->cn_count == 0 && cache->put)
cache->put(cache, node);
@@ -541,6 +644,9 @@ cache_node_put(
}
pthread_mutex_unlock(&node->cn_mutex);
+
+ if (was_put && (cache->c_flags & CACHE_CAN_SHRINK))
+ cache_shrink(cache);
}
void
@@ -632,7 +738,7 @@ cache_purge(
int i;
for (i = 0; i <= CACHE_DIRTY_PRIORITY; i++)
- cache_shake(cache, i, true);
+ cache_shake(cache, i, true, CACHE_SHAKE_COUNT);
#ifdef CACHE_DEBUG
if (cache->c_count != 0) {
next prev parent reply other threads:[~2025-08-21 1:12 UTC|newest]
Thread overview: 72+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-21 0:37 [RFC v4] fuse: use fs-iomap for better performance so we can containerize ext4 Darrick J. Wong
2025-08-21 0:49 ` [PATCHSET RFC v4 1/6] fuse4fs: fork a low level fuse server Darrick J. Wong
2025-08-21 1:08 ` [PATCH 01/20] fuse2fs: port fuse2fs to lowlevel libfuse API Darrick J. Wong
2025-08-21 1:08 ` [PATCH 02/20] fuse4fs: drop fuse 2.x support code Darrick J. Wong
2025-08-21 1:08 ` [PATCH 03/20] fuse4fs: namespace some helpers Darrick J. Wong
2025-08-21 1:08 ` [PATCH 04/20] fuse4fs: convert to low level API Darrick J. Wong
2025-08-21 1:09 ` [PATCH 05/20] libsupport: port the kernel list.h to libsupport Darrick J. Wong
2025-08-21 1:09 ` [PATCH 06/20] libsupport: add a cache Darrick J. Wong
2025-08-21 1:09 ` [PATCH 07/20] cache: disable debugging Darrick J. Wong
2025-08-21 1:09 ` [PATCH 08/20] cache: use modern list iterator macros Darrick J. Wong
2025-08-21 1:10 ` [PATCH 09/20] cache: embed struct cache in the owner Darrick J. Wong
2025-08-21 1:10 ` [PATCH 10/20] cache: pass cache pointer to callbacks Darrick J. Wong
2025-08-21 1:10 ` [PATCH 11/20] cache: pass a private data pointer through cache_walk Darrick J. Wong
2025-08-21 1:11 ` [PATCH 12/20] cache: add a helper to grab a new refcount for a cache_node Darrick J. Wong
2025-08-21 1:11 ` [PATCH 13/20] cache: return results of a cache flush Darrick J. Wong
2025-08-21 1:11 ` [PATCH 14/20] cache: add a "get only if incore" flag to cache_node_get Darrick J. Wong
2025-08-21 1:11 ` [PATCH 15/20] cache: support gradual expansion Darrick J. Wong
2025-08-21 1:12 ` Darrick J. Wong [this message]
2025-08-21 1:12 ` [PATCH 17/20] fuse4fs: add cache to track open files Darrick J. Wong
2025-08-21 1:12 ` [PATCH 18/20] fuse4fs: use the orphaned inode list Darrick J. Wong
2025-08-21 1:12 ` [PATCH 19/20] fuse4fs: implement FUSE_TMPFILE Darrick J. Wong
2025-08-21 1:13 ` [PATCH 20/20] fuse4fs: create incore reverse orphan list Darrick J. Wong
2025-08-21 0:49 ` [PATCHSET RFC v4 2/6] libext2fs: refactoring for fuse2fs iomap support Darrick J. Wong
2025-08-21 1:13 ` [PATCH 01/10] libext2fs: make it possible to extract the fd from an IO manager Darrick J. Wong
2025-08-21 1:13 ` [PATCH 02/10] libext2fs: always fsync the device when flushing the cache Darrick J. Wong
2025-08-21 1:13 ` [PATCH 03/10] libext2fs: always fsync the device when closing the unix IO manager Darrick J. Wong
2025-08-21 1:14 ` [PATCH 04/10] libext2fs: only fsync the unix fd if we wrote to the device Darrick J. Wong
2025-08-21 1:14 ` [PATCH 05/10] libext2fs: invalidate cached blocks when freeing them Darrick J. Wong
2025-08-21 1:14 ` [PATCH 06/10] libext2fs: only flush affected blocks in unix_write_byte Darrick J. Wong
2025-08-21 1:14 ` [PATCH 07/10] libext2fs: allow unix_write_byte when the write would be aligned Darrick J. Wong
2025-08-21 1:15 ` [PATCH 08/10] libext2fs: allow clients to ask to write full superblocks Darrick J. Wong
2025-08-21 1:15 ` [PATCH 09/10] libext2fs: allow callers to disallow I/O to file data blocks Darrick J. Wong
2025-08-21 1:15 ` [PATCH 10/10] libext2fs: add posix advisory locking to the unix IO manager Darrick J. Wong
2025-08-21 0:49 ` [PATCHSET RFC v4 3/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21 1:15 ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
2025-08-21 1:16 ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
2025-08-21 1:16 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
2025-08-21 1:16 ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
2025-08-21 1:17 ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
2025-08-21 1:17 ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
2025-08-21 1:17 ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
2025-08-21 1:17 ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
2025-08-21 1:18 ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
2025-08-21 1:18 ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
2025-08-21 1:18 ` [PATCH 11/19] fuse2fs: avoid fuseblk mode if fuse-iomap support is likely Darrick J. Wong
2025-08-21 1:18 ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
2025-08-21 1:19 ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
2025-08-21 1:19 ` [PATCH 14/19] fuse2fs: add strictatime/lazytime mount options Darrick J. Wong
2025-08-21 1:19 ` [PATCH 15/19] fuse2fs: configure block device block size Darrick J. Wong
2025-08-21 1:19 ` [PATCH 16/19] fuse4fs: don't use inode number translation when possible Darrick J. Wong
2025-08-21 1:20 ` [PATCH 17/19] fuse4fs: separate invalidation Darrick J. Wong
2025-08-21 1:20 ` [PATCH 18/19] fuse2fs: implement statx Darrick J. Wong
2025-08-21 1:20 ` [PATCH 19/19] fuse2fs: enable atomic writes Darrick J. Wong
2025-08-21 0:50 ` [PATCHSET RFC v4 4/6] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2025-08-21 1:20 ` [PATCH 1/2] fuse2fs: enable caching of iomaps Darrick J. Wong
2025-08-21 1:21 ` [PATCH 2/2] fuse2fs: be smarter about caching iomaps Darrick J. Wong
2025-08-21 0:50 ` [PATCHSET RFC v4 5/6] fuse2fs: handle timestamps and ACLs correctly when iomap is enabled Darrick J. Wong
2025-08-21 1:21 ` [PATCH 1/8] fuse2fs: skip permission checking on utimens " Darrick J. Wong
2025-08-21 1:21 ` [PATCH 2/8] fuse2fs: let the kernel tell us about acl/mode updates Darrick J. Wong
2025-08-21 1:21 ` [PATCH 3/8] fuse2fs: better debugging for file mode updates Darrick J. Wong
2025-08-21 1:22 ` [PATCH 4/8] fuse2fs: debug timestamp updates Darrick J. Wong
2025-08-21 1:22 ` [PATCH 5/8] fuse2fs: use coarse timestamps for iomap mode Darrick J. Wong
2025-08-21 1:22 ` [PATCH 6/8] fuse2fs: add tracing for retrieving timestamps Darrick J. Wong
2025-08-21 1:23 ` [PATCH 7/8] fuse2fs: enable syncfs Darrick J. Wong
2025-08-21 1:23 ` [PATCH 8/8] fuse2fs: skip the gdt write in op_destroy if syncfs is working Darrick J. Wong
2025-08-21 0:50 ` [PATCHSET RFC v4 6/6] fuse2fs: improve block and inode caching Darrick J. Wong
2025-08-21 1:23 ` [PATCH 1/6] libsupport: add caching IO manager Darrick J. Wong
2025-08-21 1:23 ` [PATCH 2/6] iocache: add the actual buffer cache Darrick J. Wong
2025-08-21 1:24 ` [PATCH 3/6] iocache: bump buffer mru priority every 50 accesses Darrick J. Wong
2025-08-21 1:24 ` [PATCH 4/6] fuse2fs: enable caching IO manager Darrick J. Wong
2025-08-21 1:24 ` [PATCH 5/6] fuse2fs: increase inode cache size Darrick J. Wong
2025-08-21 1:24 ` [PATCH 6/6] libext2fs: improve caching for inodes Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=175573713095.20753.14080863399069888459.stgit@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=John@groves.net \
--cc=amir73il@gmail.com \
--cc=bernd@bsbernd.com \
--cc=joannelkoong@gmail.com \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=neal@gompa.dev \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox