All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache
@ 2026-06-11  8:36 Nithurshen
  2026-06-11  9:15 ` Gao Xiang
  2026-06-11 18:46 ` [PATCH v2] " Nithurshen
  0 siblings, 2 replies; 3+ messages in thread
From: Nithurshen @ 2026-06-11  8:36 UTC (permalink / raw)
  To: linux-erofs; +Cc: xiang, hsiangkao, Nithurshen

This patch introduces a thread-safe metadata cache to reduce redundant
I/O and decompression overhead during fsck extraction.

To ensure it remains highly concurrent for worker threads extracting
pclusters, the cache utilizes a bucketed, rw-semaphore protected
architecture modeled after the existing fragment cache.

Furthermore, to prevent out-of-memory (OOM) scenarios on exceptionally
large EROFS images, the cache implements a strict Global Least Recently
Used (LRU) eviction policy. The maximum cache size is dynamically
configurable via the new '--cache-size' parameter, which defaults to a
safe, fixed threshold of 32 MB.

Signed-off-by: Nithurshen <nithurshen.dev@gmail.com>
---
 fsck/main.c              |  12 ++++
 include/erofs/internal.h |   2 +
 lib/data.c               | 149 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index ffe7e29..7a1e573 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -67,6 +67,7 @@ static struct option long_options[] = {
 	{"no-xattrs", no_argument, 0, 14},
 	{"nid", required_argument, 0, 15},
 	{"path", required_argument, 0, 16},
+	{"cache-size", required_argument, 0, 17},
 	{"no-sbcrc", no_argument, 0, 512},
 	{0, 0, 0, 0},
 };
@@ -120,6 +121,7 @@ static void usage(int argc, char **argv)
 		" --offset=#             skip # bytes at the beginning of IMAGE\n"
 		" --nid=#                check or extract from the target inode of nid #\n"
 		" --path=X               check or extract from the target inode of path X\n"
+		" --cache-size=#        set maximum metadata cache size in bytes (default 32MB)\n"
 		" --no-sbcrc             bypass the superblock checksum verification\n"
 		" --[no-]xattrs          whether to dump extended attributes (default off)\n"
 		"\n"
@@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv)
 		case 16:
 			fsckcfg.inode_path = optarg;
 			break;
+		case 17: {
+			char *endptr;
+			unsigned long cache_size = strtoul(optarg, &endptr, 0);
+			if (*endptr != '\0') {
+				erofs_err("invalid metadata cache size %s", optarg);
+				return -EINVAL;
+			}
+			erofs_meta_cache_set_capacity(cache_size);
+			break;
+		}
 		case 512:
 			fsckcfg.nosbcrc = true;
 			break;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 94f14da..34b7eb3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -459,6 +459,8 @@ struct z_erofs_read_ctx {
 
 void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx);
 
+void erofs_meta_cache_set_capacity(unsigned long bytes);
+
 int liberofs_global_init(void);
 void liberofs_global_exit(void);
 
diff --git a/lib/data.c b/lib/data.c
index e9d2218..9acf2bf 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,84 @@ struct z_erofs_decompress_task {
 	unsigned int nr_reqs;
 };
 
+#define META_HASHSIZE		65536
+#define META_HASH(c)		((c) & (META_HASHSIZE - 1))
+
+struct erofs_meta_bucket {
+	struct list_head hash;
+	erofs_rwsem_t lock;
+};
+
+struct erofs_meta_item {
+	struct list_head list;
+	struct list_head lru;
+	u64 key;
+	char *data;
+	int length;
+	bool evicting;
+};
+
+static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
+static bool meta_cache_inited = false;
+EROFS_DEFINE_MUTEX(meta_cache_init_lock);
+
+static EROFS_DEFINE_MUTEX(meta_lru_lock);
+static struct list_head meta_lru_list;
+static unsigned long meta_cache_bytes = 0;
+static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024; 
+
+void erofs_meta_cache_set_capacity(unsigned long bytes)
+{
+	meta_cache_max_bytes = bytes;
+}
+
+static void erofs_meta_cache_init(void)
+{
+	int i;
+
+	erofs_mutex_lock(&meta_cache_init_lock);
+	if (meta_cache_inited) {
+		erofs_mutex_unlock(&meta_cache_init_lock);
+		return;
+	}
+
+	for (i = 0; i < META_HASHSIZE; ++i) {
+		init_list_head(&meta_bks[i].hash);
+		erofs_init_rwsem(&meta_bks[i].lock);
+	}
+	init_list_head(&meta_lru_list);
+	meta_cache_inited = true;
+	erofs_mutex_unlock(&meta_cache_init_lock);
+}
+
+static void erofs_meta_cache_evict(void)
+{
+	struct erofs_meta_item *item;
+	struct erofs_meta_bucket *bk;
+
+	erofs_mutex_lock(&meta_lru_lock);
+	while (meta_cache_bytes > meta_cache_max_bytes && !list_empty(&meta_lru_list)) {
+		/* Get the least recently used item (tail of the list) */
+		item = list_last_entry(&meta_lru_list, struct erofs_meta_item, lru);
+		item->evicting = true; /* Mark it dead to block cache hits from resurrecting it */
+		list_del(&item->lru);
+		init_list_head(&item->lru);
+		meta_cache_bytes -= item->length;
+		erofs_mutex_unlock(&meta_lru_lock);
+
+		bk = &meta_bks[META_HASH(item->key)];
+		erofs_down_write(&bk->lock);
+		list_del(&item->list);
+		erofs_up_write(&bk->lock);
+
+		free(item->data);
+		free(item);
+
+		erofs_mutex_lock(&meta_lru_lock);
+	}
+	erofs_mutex_unlock(&meta_lru_lock);
+}
+
 static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp)
 {
 	struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task *)work;
@@ -604,7 +682,72 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi,
 void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
 			  erofs_off_t *offset, int *lengthp)
 {
+	u64 key = nid ? nid : *offset;
+	struct erofs_meta_bucket *bk;
+	struct erofs_meta_item *item;
+	void *buffer = NULL;
+
+	if (__erofs_unlikely(!meta_cache_inited))
+		erofs_meta_cache_init();
+
+	bk = &meta_bks[META_HASH(key)];
+
+	erofs_down_read(&bk->lock);
+	list_for_each_entry(item, &bk->hash, list) {
+		if (item->key == key) {
+			buffer = malloc(item->length);
+			if (buffer) {
+				memcpy(buffer, item->data, item->length);
+				*lengthp = item->length;
+				*offset = round_up(*offset, 4);
+				*offset += sizeof(__le16) + item->length;
+				
+				erofs_mutex_lock(&meta_lru_lock);
+                if (!item->evicting)
+                    list_del(&item->lru);
+					list_add(&item->lru, &meta_lru_list);
+                erofs_mutex_unlock(&meta_lru_lock);
+			}
+			break;
+		}
+	}
+	erofs_up_read(&bk->lock);
+
+	if (buffer)
+		return buffer;
+
 	if (nid)
-		return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
-	return erofs_read_metadata_bdi(sbi, offset, lengthp);
-}
+		buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+	else
+		buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
+
+	if (IS_ERR(buffer))
+		return buffer;
+
+	item = malloc(sizeof(*item));
+	if (item) {
+		item->key = key;
+		item->length = *lengthp;
+		item->evicting = false;
+		item->data = malloc(*lengthp);
+		if (item->data) {
+			memcpy(item->data, buffer, *lengthp);
+			
+			erofs_down_write(&bk->lock);
+			list_add_tail(&item->list, &bk->hash);
+			erofs_up_write(&bk->lock);
+
+			erofs_mutex_lock(&meta_lru_lock);
+            list_add(&item->lru, &meta_lru_list);
+            meta_cache_bytes += *lengthp;
+            erofs_mutex_unlock(&meta_lru_lock);
+
+			if (meta_cache_bytes > meta_cache_max_bytes)
+				erofs_meta_cache_evict();
+		} else {
+			free(item);
+		}
+	}
+
+	return buffer;
+}
\ No newline at end of file
-- 
2.52.0



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2026-06-11 18:47 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-06-11  8:36 [PATCH v1] fsck.erofs: implement thread-safe global LRU metadata cache Nithurshen
2026-06-11  9:15 ` Gao Xiang
2026-06-11 18:46 ` [PATCH v2] " Nithurshen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.