From: Nithurshen <nithurshen.dev@gmail.com>
To: linux-erofs@lists.ozlabs.org
Cc: nithurshen.dev@gmail.com, hsiangkao@linux.alibaba.com, xiang@kernel.org
Subject: [PATCH v2] fsck.erofs: implement thread-safe global LRU metadata cache
Date: Fri, 12 Jun 2026 00:16:51 +0530 [thread overview]
Message-ID: <20260611184651.89363-1-nithurshen.dev@gmail.com> (raw)
In-Reply-To: <20260611083601.81061-1-nithurshen.dev@gmail.com>
This patch introduces a thread-safe userspace metadata cache to reduce
redundant decompression cycles and the overhead of repetitive pread()
syscalls across multiple background worker threads.
To ensure it remains highly concurrent for worker threads extracting
pclusters, the cache utilizes a bucketed, rw-semaphore protected
architecture modeled after the existing fragment cache.
While the introduction of a userspace cache inherently increases the
memory footprint compared to relying solely on the kernel's page cache,
this patch implements a strict Global Least Recently Used (LRU) eviction
policy to safely bound this additional memory overhead. This prevents the
cache from growing unbounded on exceptionally large EROFS images. The
maximum cache capacity is dynamically configurable via the new
'--cache-size' parameter, which defaults to a safe threshold of 32 MB.
Signed-off-by: Nithurshen <nithurshen.dev@gmail.com>
---
fsck/main.c | 12 ++++
include/erofs/internal.h | 2 +
lib/data.c | 150 ++++++++++++++++++++++++++++++++++++++-
3 files changed, 161 insertions(+), 3 deletions(-)
diff --git a/fsck/main.c b/fsck/main.c
index ffe7e29..7a1e573 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -67,6 +67,7 @@ static struct option long_options[] = {
{"no-xattrs", no_argument, 0, 14},
{"nid", required_argument, 0, 15},
{"path", required_argument, 0, 16},
+ {"cache-size", required_argument, 0, 17},
{"no-sbcrc", no_argument, 0, 512},
{0, 0, 0, 0},
};
@@ -120,6 +121,7 @@ static void usage(int argc, char **argv)
" --offset=# skip # bytes at the beginning of IMAGE\n"
" --nid=# check or extract from the target inode of nid #\n"
" --path=X check or extract from the target inode of path X\n"
+ " --cache-size=# set maximum metadata cache size in bytes (default 32MB)\n"
" --no-sbcrc bypass the superblock checksum verification\n"
" --[no-]xattrs whether to dump extended attributes (default off)\n"
"\n"
@@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
case 16:
fsckcfg.inode_path = optarg;
break;
+ case 17: {
+ char *endptr;
+ unsigned long cache_size = strtoul(optarg, &endptr, 0);
+ if (*endptr != '\0') {
+ erofs_err("invalid metadata cache size %s", optarg);
+ return -EINVAL;
+ }
+ erofs_meta_cache_set_capacity(cache_size);
+ break;
+ }
case 512:
fsckcfg.nosbcrc = true;
break;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 94f14da..34b7eb3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -459,6 +459,8 @@ struct z_erofs_read_ctx {
void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx);
+void erofs_meta_cache_set_capacity(unsigned long bytes);
+
int liberofs_global_init(void);
void liberofs_global_exit(void);
diff --git a/lib/data.c b/lib/data.c
index e9d2218..b8d81b3 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,84 @@ struct z_erofs_decompress_task {
unsigned int nr_reqs;
};
+#define META_HASHSIZE 65536
+#define META_HASH(c) ((c) & (META_HASHSIZE - 1))
+
+struct erofs_meta_bucket {
+ struct list_head hash;
+ erofs_rwsem_t lock;
+};
+
+struct erofs_meta_item {
+ struct list_head list;
+ struct list_head lru;
+ u64 key;
+ char *data;
+ int length;
+ bool evicting;
+};
+
+static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
+static bool meta_cache_inited = false;
+EROFS_DEFINE_MUTEX(meta_cache_init_lock);
+
+static EROFS_DEFINE_MUTEX(meta_lru_lock);
+static struct list_head meta_lru_list;
+static unsigned long meta_cache_bytes = 0;
+static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024;
+
+void erofs_meta_cache_set_capacity(unsigned long bytes)
+{
+ meta_cache_max_bytes = bytes;
+}
+
+static void erofs_meta_cache_init(void)
+{
+ int i;
+
+ erofs_mutex_lock(&meta_cache_init_lock);
+ if (meta_cache_inited) {
+ erofs_mutex_unlock(&meta_cache_init_lock);
+ return;
+ }
+
+ for (i = 0; i < META_HASHSIZE; ++i) {
+ init_list_head(&meta_bks[i].hash);
+ erofs_init_rwsem(&meta_bks[i].lock);
+ }
+ init_list_head(&meta_lru_list);
+ meta_cache_inited = true;
+ erofs_mutex_unlock(&meta_cache_init_lock);
+}
+
+static void erofs_meta_cache_evict(void)
+{
+ struct erofs_meta_item *item;
+ struct erofs_meta_bucket *bk;
+
+ erofs_mutex_lock(&meta_lru_lock);
+ while (meta_cache_bytes > meta_cache_max_bytes && !list_empty(&meta_lru_list)) {
+ /* Get the least recently used item (tail of the list) */
+ item = list_last_entry(&meta_lru_list, struct erofs_meta_item, lru);
+ item->evicting = true; /* Mark it dead to block cache hits from resurrecting it */
+ list_del(&item->lru);
+ init_list_head(&item->lru);
+ meta_cache_bytes -= item->length;
+ erofs_mutex_unlock(&meta_lru_lock);
+
+ bk = &meta_bks[META_HASH(item->key)];
+ erofs_down_write(&bk->lock);
+ list_del(&item->list);
+ erofs_up_write(&bk->lock);
+
+ free(item->data);
+ free(item);
+
+ erofs_mutex_lock(&meta_lru_lock);
+ }
+ erofs_mutex_unlock(&meta_lru_lock);
+}
+
static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp)
{
struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task *)work;
@@ -604,7 +682,73 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi,
void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
erofs_off_t *offset, int *lengthp)
{
+ u64 key = nid ? nid : *offset;
+ struct erofs_meta_bucket *bk;
+ struct erofs_meta_item *item;
+ void *buffer = NULL;
+
+ if (__erofs_unlikely(!meta_cache_inited))
+ erofs_meta_cache_init();
+
+ bk = &meta_bks[META_HASH(key)];
+
+ erofs_down_read(&bk->lock);
+ list_for_each_entry(item, &bk->hash, list) {
+ if (item->key == key) {
+ buffer = malloc(item->length);
+ if (buffer) {
+ memcpy(buffer, item->data, item->length);
+ *lengthp = item->length;
+ *offset = round_up(*offset, 4);
+ *offset += sizeof(__le16) + item->length;
+
+ erofs_mutex_lock(&meta_lru_lock);
+ if (!item->evicting) {
+ list_del(&item->lru);
+ list_add(&item->lru, &meta_lru_list);
+ }
+ erofs_mutex_unlock(&meta_lru_lock);
+ }
+ break;
+ }
+ }
+ erofs_up_read(&bk->lock);
+
+ if (buffer)
+ return buffer;
+
if (nid)
- return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
- return erofs_read_metadata_bdi(sbi, offset, lengthp);
-}
+ buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+ else
+ buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
+
+ if (IS_ERR(buffer))
+ return buffer;
+
+ item = malloc(sizeof(*item));
+ if (item) {
+ item->key = key;
+ item->length = *lengthp;
+ item->evicting = false;
+ item->data = malloc(*lengthp);
+ if (item->data) {
+ memcpy(item->data, buffer, *lengthp);
+
+ erofs_down_write(&bk->lock);
+ list_add_tail(&item->list, &bk->hash);
+ erofs_up_write(&bk->lock);
+
+ erofs_mutex_lock(&meta_lru_lock);
+ list_add(&item->lru, &meta_lru_list);
+ meta_cache_bytes += *lengthp;
+ erofs_mutex_unlock(&meta_lru_lock);
+
+ if (meta_cache_bytes > meta_cache_max_bytes)
+ erofs_meta_cache_evict();
+ } else {
+ free(item);
+ }
+ }
+
+ return buffer;
+}
\ No newline at end of file
--
2.52.0
prev parent reply other threads:[~2026-06-11 18:47 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-11 8:36 [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache Nithurshen
2026-06-11 9:15 ` Gao Xiang
2026-06-11 18:46 ` Nithurshen [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260611184651.89363-1-nithurshen.dev@gmail.com \
--to=nithurshen.dev@gmail.com \
--cc=hsiangkao@linux.alibaba.com \
--cc=linux-erofs@lists.ozlabs.org \
--cc=xiang@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.