All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nithurshen <nithurshen.dev@gmail.com>
To: linux-erofs@lists.ozlabs.org
Cc: xiang@kernel.org, hsiangkao@linux.alibaba.com,
	Nithurshen <nithurshen.dev@gmail.com>
Subject: [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache
Date: Thu, 11 Jun 2026 14:06:01 +0530	[thread overview]
Message-ID: <20260611083601.81061-1-nithurshen.dev@gmail.com> (raw)

This patch introduces a thread-safe metadata cache to reduce redundant
I/O and decompression overhead during fsck extraction.

To ensure it remains highly concurrent for worker threads extracting
pclusters, the cache utilizes a bucketed, rw-semaphore protected
architecture modeled after the existing fragment cache.

Furthermore, to prevent out-of-memory (OOM) scenarios on exceptionally
large EROFS images, the cache implements a strict Global Least Recently
Used (LRU) eviction policy. The maximum cache size is dynamically
configurable via the new '--cache-size' parameter, which defaults to a
safe, fixed threshold of 32 MB.

Signed-off-by: Nithurshen <nithurshen.dev@gmail.com>
---
 fsck/main.c              |  12 ++++
 include/erofs/internal.h |   2 +
 lib/data.c               | 149 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index ffe7e29..7a1e573 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -67,6 +67,7 @@ static struct option long_options[] = {
 	{"no-xattrs", no_argument, 0, 14},
 	{"nid", required_argument, 0, 15},
 	{"path", required_argument, 0, 16},
+	{"cache-size", required_argument, 0, 17},
 	{"no-sbcrc", no_argument, 0, 512},
 	{0, 0, 0, 0},
 };
@@ -120,6 +121,7 @@ static void usage(int argc, char **argv)
 		" --offset=#             skip # bytes at the beginning of IMAGE\n"
 		" --nid=#                check or extract from the target inode of nid #\n"
 		" --path=X               check or extract from the target inode of path X\n"
+		" --cache-size=#        set maximum metadata cache size in bytes (default 32MB)\n"
 		" --no-sbcrc             bypass the superblock checksum verification\n"
 		" --[no-]xattrs          whether to dump extended attributes (default off)\n"
 		"\n"
@@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
 		case 16:
 			fsckcfg.inode_path = optarg;
 			break;
+		case 17: {
+			char *endptr;
+			unsigned long cache_size = strtoul(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid metadata cache size %s", optarg);
+				return -EINVAL;
+			}
+			erofs_meta_cache_set_capacity(cache_size);
+			break;
+		}
 		case 512:
 			fsckcfg.nosbcrc = true;
 			break;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 94f14da..34b7eb3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -459,6 +459,8 @@ struct z_erofs_read_ctx {
 
 void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx);
 
+void erofs_meta_cache_set_capacity(unsigned long bytes);
+
 int liberofs_global_init(void);
 void liberofs_global_exit(void);
 
diff --git a/lib/data.c b/lib/data.c
index e9d2218..9acf2bf 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,84 @@ struct z_erofs_decompress_task {
 	unsigned int nr_reqs;
 };
 
+#define META_HASHSIZE		65536
+#define META_HASH(c)		((c) & (META_HASHSIZE - 1))
+
+struct erofs_meta_bucket {
+	struct list_head hash;
+	erofs_rwsem_t lock;
+};
+
+struct erofs_meta_item {
+	struct list_head list;
+	struct list_head lru;
+	u64 key;
+	char *data;
+	int length;
+	bool evicting;
+};
+
+static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
+static bool meta_cache_inited = false;
+EROFS_DEFINE_MUTEX(meta_cache_init_lock);
+
+static EROFS_DEFINE_MUTEX(meta_lru_lock);
+static struct list_head meta_lru_list;
+static unsigned long meta_cache_bytes = 0;
+static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024; 
+
+void erofs_meta_cache_set_capacity(unsigned long bytes)
+{
+	meta_cache_max_bytes = bytes;
+}
+
+static void erofs_meta_cache_init(void)
+{
+	int i;
+
+	erofs_mutex_lock(&meta_cache_init_lock);
+	if (meta_cache_inited) {
+		erofs_mutex_unlock(&meta_cache_init_lock);
+		return;
+	}
+
+	for (i = 0; i < META_HASHSIZE; ++i) {
+		init_list_head(&meta_bks[i].hash);
+		erofs_init_rwsem(&meta_bks[i].lock);
+	}
+	init_list_head(&meta_lru_list);
+	meta_cache_inited = true;
+	erofs_mutex_unlock(&meta_cache_init_lock);
+}
+
+static void erofs_meta_cache_evict(void)
+{
+	struct erofs_meta_item *item;
+	struct erofs_meta_bucket *bk;
+
+	erofs_mutex_lock(&meta_lru_lock);
+	while (meta_cache_bytes > meta_cache_max_bytes && !list_empty(&meta_lru_list)) {
+		/* Get the least recently used item (tail of the list) */
+		item = list_last_entry(&meta_lru_list, struct erofs_meta_item, lru);
+		item->evicting = true; /* Mark it dead to block cache hits from resurrecting it */
+		list_del(&item->lru);
+		init_list_head(&item->lru);
+		meta_cache_bytes -= item->length;
+		erofs_mutex_unlock(&meta_lru_lock);
+
+		bk = &meta_bks[META_HASH(item->key)];
+		erofs_down_write(&bk->lock);
+		list_del(&item->list);
+		erofs_up_write(&bk->lock);
+
+		free(item->data);
+		free(item);
+
+		erofs_mutex_lock(&meta_lru_lock);
+	}
+	erofs_mutex_unlock(&meta_lru_lock);
+}
+
 static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp)
 {
 	struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task *)work;
@@ -604,7 +682,72 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi,
 void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
 			  erofs_off_t *offset, int *lengthp)
 {
+	u64 key = nid ? nid : *offset;
+	struct erofs_meta_bucket *bk;
+	struct erofs_meta_item *item;
+	void *buffer = NULL;
+
+	if (__erofs_unlikely(!meta_cache_inited))
+		erofs_meta_cache_init();
+
+	bk = &meta_bks[META_HASH(key)];
+
+	erofs_down_read(&bk->lock);
+	list_for_each_entry(item, &bk->hash, list) {
+		if (item->key == key) {
+			buffer = malloc(item->length);
+			if (buffer) {
+				memcpy(buffer, item->data, item->length);
+				*lengthp = item->length;
+				*offset = round_up(*offset, 4);
+				*offset += sizeof(__le16) + item->length;
+				
+				erofs_mutex_lock(&meta_lru_lock);
+                if (!item->evicting)
+                    list_del(&item->lru);
+					list_add(&item->lru, &meta_lru_list);
+                erofs_mutex_unlock(&meta_lru_lock);
+			}
+			break;
+		}
+	}
+	erofs_up_read(&bk->lock);
+
+	if (buffer)
+		return buffer;
+
 	if (nid)
-		return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
-	return erofs_read_metadata_bdi(sbi, offset, lengthp);
-}
+		buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+	else
+		buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
+
+	if (IS_ERR(buffer))
+		return buffer;
+
+	item = malloc(sizeof(*item));
+	if (item) {
+		item->key = key;
+		item->length = *lengthp;
+		item->evicting = false;
+		item->data = malloc(*lengthp);
+		if (item->data) {
+			memcpy(item->data, buffer, *lengthp);
+			
+			erofs_down_write(&bk->lock);
+			list_add_tail(&item->list, &bk->hash);
+			erofs_up_write(&bk->lock);
+
+			erofs_mutex_lock(&meta_lru_lock);
+            list_add(&item->lru, &meta_lru_list);
+            meta_cache_bytes += *lengthp;
+            erofs_mutex_unlock(&meta_lru_lock);
+
+			if (meta_cache_bytes > meta_cache_max_bytes)
+				erofs_meta_cache_evict();
+		} else {
+			free(item);
+		}
+	}
+
+	return buffer;
+}
\ No newline at end of file
-- 
2.52.0



             reply	other threads:[~2026-06-11  8:36 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-11  8:36 Nithurshen [this message]
2026-06-11  9:15 ` [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache Gao Xiang
2026-06-11 18:46 ` [PATCH v2] " Nithurshen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260611083601.81061-1-nithurshen.dev@gmail.com \
    --to=nithurshen.dev@gmail.com \
    --cc=hsiangkao@linux.alibaba.com \
    --cc=linux-erofs@lists.ozlabs.org \
    --cc=xiang@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.