From: Nithurshen <nithurshen.dev@gmail.com>
To: linux-erofs@lists.ozlabs.org
Cc: xiang@kernel.org, hsiangkao@linux.alibaba.com,
Nithurshen <nithurshen.dev@gmail.com>
Subject: [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache
Date: Thu, 11 Jun 2026 14:06:01 +0530 [thread overview]
Message-ID: <20260611083601.81061-1-nithurshen.dev@gmail.com> (raw)
This patch introduces a thread-safe metadata cache to reduce redundant
I/O and decompression overhead during fsck extraction.
To ensure it remains highly concurrent for worker threads extracting
pclusters, the cache utilizes a bucketed, rw-semaphore protected
architecture modeled after the existing fragment cache.
Furthermore, to prevent out-of-memory (OOM) scenarios on exceptionally
large EROFS images, the cache implements a strict Global Least Recently
Used (LRU) eviction policy. The maximum cache size is dynamically
configurable via the new '--cache-size' parameter, which defaults to a
safe, fixed threshold of 32 MB.
Signed-off-by: Nithurshen <nithurshen.dev@gmail.com>
---
fsck/main.c | 12 ++++
include/erofs/internal.h | 2 +
lib/data.c | 149 ++++++++++++++++++++++++++++++++++++++-
3 files changed, 160 insertions(+), 3 deletions(-)
diff --git a/fsck/main.c b/fsck/main.c
index ffe7e29..7a1e573 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -67,6 +67,7 @@ static struct option long_options[] = {
{"no-xattrs", no_argument, 0, 14},
{"nid", required_argument, 0, 15},
{"path", required_argument, 0, 16},
+ {"cache-size", required_argument, 0, 17},
{"no-sbcrc", no_argument, 0, 512},
{0, 0, 0, 0},
};
@@ -120,6 +121,7 @@ static void usage(int argc, char **argv)
" --offset=# skip # bytes at the beginning of IMAGE\n"
" --nid=# check or extract from the target inode of nid #\n"
" --path=X check or extract from the target inode of path X\n"
+ " --cache-size=# set maximum metadata cache size in bytes (default 32MB)\n"
" --no-sbcrc bypass the superblock checksum verification\n"
" --[no-]xattrs whether to dump extended attributes (default off)\n"
"\n"
@@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
case 16:
fsckcfg.inode_path = optarg;
break;
+ case 17: {
+ char *endptr;
+ unsigned long cache_size = strtoul(optarg, &endptr, 0);
+ if (*endptr != '\0') {
+ erofs_err("invalid metadata cache size %s", optarg);
+ return -EINVAL;
+ }
+ erofs_meta_cache_set_capacity(cache_size);
+ break;
+ }
case 512:
fsckcfg.nosbcrc = true;
break;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 94f14da..34b7eb3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -459,6 +459,8 @@ struct z_erofs_read_ctx {
void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx);
+void erofs_meta_cache_set_capacity(unsigned long bytes);
+
int liberofs_global_init(void);
void liberofs_global_exit(void);
diff --git a/lib/data.c b/lib/data.c
index e9d2218..9acf2bf 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,84 @@ struct z_erofs_decompress_task {
unsigned int nr_reqs;
};
+#define META_HASHSIZE 65536
+#define META_HASH(c) ((c) & (META_HASHSIZE - 1))
+
+struct erofs_meta_bucket {
+ struct list_head hash;
+ erofs_rwsem_t lock;
+};
+
+struct erofs_meta_item {
+ struct list_head list;
+ struct list_head lru;
+ u64 key;
+ char *data;
+ int length;
+ bool evicting;
+};
+
+static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
+static bool meta_cache_inited = false;
+EROFS_DEFINE_MUTEX(meta_cache_init_lock);
+
+static EROFS_DEFINE_MUTEX(meta_lru_lock);
+static struct list_head meta_lru_list;
+static unsigned long meta_cache_bytes = 0;
+static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024;
+
+void erofs_meta_cache_set_capacity(unsigned long bytes)
+{
+ meta_cache_max_bytes = bytes;
+}
+
+static void erofs_meta_cache_init(void)
+{
+ int i;
+
+ erofs_mutex_lock(&meta_cache_init_lock);
+ if (meta_cache_inited) {
+ erofs_mutex_unlock(&meta_cache_init_lock);
+ return;
+ }
+
+ for (i = 0; i < META_HASHSIZE; ++i) {
+ init_list_head(&meta_bks[i].hash);
+ erofs_init_rwsem(&meta_bks[i].lock);
+ }
+ init_list_head(&meta_lru_list);
+ meta_cache_inited = true;
+ erofs_mutex_unlock(&meta_cache_init_lock);
+}
+
+static void erofs_meta_cache_evict(void)
+{
+ struct erofs_meta_item *item;
+ struct erofs_meta_bucket *bk;
+
+ erofs_mutex_lock(&meta_lru_lock);
+ while (meta_cache_bytes > meta_cache_max_bytes && !list_empty(&meta_lru_list)) {
+ /* Get the least recently used item (tail of the list) */
+ item = list_last_entry(&meta_lru_list, struct erofs_meta_item, lru);
+ item->evicting = true; /* Mark it dead to block cache hits from resurrecting it */
+ list_del(&item->lru);
+ init_list_head(&item->lru);
+ meta_cache_bytes -= item->length;
+ erofs_mutex_unlock(&meta_lru_lock);
+
+ bk = &meta_bks[META_HASH(item->key)];
+ erofs_down_write(&bk->lock);
+ list_del(&item->list);
+ erofs_up_write(&bk->lock);
+
+ free(item->data);
+ free(item);
+
+ erofs_mutex_lock(&meta_lru_lock);
+ }
+ erofs_mutex_unlock(&meta_lru_lock);
+}
+
static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp)
{
struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task *)work;
@@ -604,7 +682,72 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi,
void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
erofs_off_t *offset, int *lengthp)
{
+ u64 key = nid ? nid : *offset;
+ struct erofs_meta_bucket *bk;
+ struct erofs_meta_item *item;
+ void *buffer = NULL;
+
+ if (__erofs_unlikely(!meta_cache_inited))
+ erofs_meta_cache_init();
+
+ bk = &meta_bks[META_HASH(key)];
+
+ erofs_down_read(&bk->lock);
+ list_for_each_entry(item, &bk->hash, list) {
+ if (item->key == key) {
+ buffer = malloc(item->length);
+ if (buffer) {
+ memcpy(buffer, item->data, item->length);
+ *lengthp = item->length;
+ *offset = round_up(*offset, 4);
+ *offset += sizeof(__le16) + item->length;
+
+ erofs_mutex_lock(&meta_lru_lock);
+ if (!item->evicting)
+ list_del(&item->lru);
+ list_add(&item->lru, &meta_lru_list);
+ erofs_mutex_unlock(&meta_lru_lock);
+ }
+ break;
+ }
+ }
+ erofs_up_read(&bk->lock);
+
+ if (buffer)
+ return buffer;
+
if (nid)
- return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
- return erofs_read_metadata_bdi(sbi, offset, lengthp);
-}
+ buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+ else
+ buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
+
+ if (IS_ERR(buffer))
+ return buffer;
+
+ item = malloc(sizeof(*item));
+ if (item) {
+ item->key = key;
+ item->length = *lengthp;
+ item->evicting = false;
+ item->data = malloc(*lengthp);
+ if (item->data) {
+ memcpy(item->data, buffer, *lengthp);
+
+ erofs_down_write(&bk->lock);
+ list_add_tail(&item->list, &bk->hash);
+ erofs_up_write(&bk->lock);
+
+ erofs_mutex_lock(&meta_lru_lock);
+ list_add(&item->lru, &meta_lru_list);
+ meta_cache_bytes += *lengthp;
+ erofs_mutex_unlock(&meta_lru_lock);
+
+ if (meta_cache_bytes > meta_cache_max_bytes)
+ erofs_meta_cache_evict();
+ } else {
+ free(item);
+ }
+ }
+
+ return buffer;
+}
\ No newline at end of file
--
2.52.0
next reply other threads:[~2026-06-11 8:36 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-11 8:36 Nithurshen [this message]
2026-06-11 9:15 ` [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache Gao Xiang
2026-06-13 10:10 ` Nithurshen
2026-06-15 1:48 ` Gao Xiang
2026-06-15 2:45 ` Nithurshen
2026-06-11 18:46 ` [PATCH v2] " Nithurshen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260611083601.81061-1-nithurshen.dev@gmail.com \
--to=nithurshen.dev@gmail.com \
--cc=hsiangkao@linux.alibaba.com \
--cc=linux-erofs@lists.ozlabs.org \
--cc=xiang@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox