public inbox for linux-erofs@ozlabs.org
 help / color / mirror / Atom feed
* [PATCH 1/2 RESEND] erofs-utils: mount: generalize nbd source types for multi-backend support
@ 2026-03-30 12:44 Yifan Zhao
  2026-03-30 12:44 ` [PATCH 2/2] erofs-utils: mount: add fanotify pre-content OCI backend Yifan Zhao
  0 siblings, 1 reply; 5+ messages in thread
From: Yifan Zhao @ 2026-03-30 12:44 UTC (permalink / raw)
  To: linux-erofs; +Cc: hsiangkao, jingrui, zhukeqian1, zhaoyifan28, hudsonzhu

From: Chengyu Zhu <hudsonzhu@tencent.com>

Rename nbd-specific source type names to generic mount-level names in
preparation for adding ublk backend support.

Signed-off-by: Chengyu Zhu <hudsonzhu@tencent.com>
---
Note: This is a former patch authored by Chengyu Zhu.

 mount/main.c | 52 ++++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/mount/main.c b/mount/main.c
index 3ef4e9c..350738d 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -71,18 +71,18 @@ static struct erofsmount_cfg {
 	.fstype = "erofs",
 };
 
-enum erofs_nbd_source_type {
-	EROFSNBD_SOURCE_LOCAL,
-	EROFSNBD_SOURCE_OCI,
+enum erofsmount_source_type {
+	EROFSMOUNT_SOURCE_LOCAL,
+	EROFSMOUNT_SOURCE_OCI,
 };
 
-static struct erofs_nbd_source {
-	enum erofs_nbd_source_type type;
+static struct erofsmount_source {
+	enum erofsmount_source_type type;
 	union {
 		const char *device_path;
 		struct ocierofs_config ocicfg;
 	};
-} nbdsrc;
+} mountsrc;
 
 static void usage(int argc, char **argv)
 {
@@ -122,7 +122,7 @@ static void version(void)
 #ifdef OCIEROFS_ENABLED
 static int erofsmount_parse_oci_option(const char *option)
 {
-	struct ocierofs_config *oci_cfg = &nbdsrc.ocicfg;
+	struct ocierofs_config *oci_cfg = &mountsrc.ocicfg;
 	const char *p;
 	long idx;
 
@@ -229,12 +229,12 @@ static long erofsmount_parse_flagopts(char *s, long flags, char **more)
 		if (!strcmp(s, "loop")) {
 			mountcfg.force_loopdev = true;
 		} else if (strncmp(s, "oci", 3) == 0) {
-			/* Initialize ocicfg here iff != EROFSNBD_SOURCE_OCI */
-			if (nbdsrc.type != EROFSNBD_SOURCE_OCI) {
+			/* Initialize ocicfg here iff != EROFSMOUNT_SOURCE_OCI */
+			if (mountsrc.type != EROFSMOUNT_SOURCE_OCI) {
 				erofs_warn("EXPERIMENTAL OCI mount support in use, use at your own risk.");
 				erofs_warn("Note that runtime performance is still unoptimized.");
-				nbdsrc.type = EROFSNBD_SOURCE_OCI;
-				nbdsrc.ocicfg.layer_index = -1;
+				mountsrc.type = EROFSMOUNT_SOURCE_OCI;
+				mountsrc.ocicfg.layer_index = -1;
 			}
 			err = erofsmount_parse_oci_option(s);
 			if (err < 0)
@@ -288,7 +288,7 @@ static int erofsmount_parse_options(int argc, char **argv)
 	int opt;
 	int i;
 
-	nbdsrc.ocicfg.layer_index = -1;
+	mountsrc.ocicfg.layer_index = -1;
 
 	while ((opt = getopt_long(argc, argv, "VNfhd:no:st:uv",
 				  long_options, NULL)) != -1) {
@@ -664,14 +664,14 @@ out:
 	return (void *)(uintptr_t)err;
 }
 
-static int erofsmount_startnbd(int nbdfd, struct erofs_nbd_source *source)
+static int erofsmount_startnbd(int nbdfd, struct erofsmount_source *source)
 {
 	struct erofsmount_nbd_ctx ctx = {};
 	uintptr_t retcode;
 	pthread_t th;
 	int err, err2;
 
-	if (source->type == EROFSNBD_SOURCE_OCI) {
+	if (source->type == EROFSMOUNT_SOURCE_OCI) {
 		if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
 			err = erofsmount_tarindex_open(&ctx.vd, &source->ocicfg,
 						       source->ocicfg.tarindex_path,
@@ -720,7 +720,7 @@ out_closefd:
 }
 
 #ifdef OCIEROFS_ENABLED
-static int erofsmount_write_recovery_oci(FILE *f, struct erofs_nbd_source *source)
+static int erofsmount_write_recovery_oci(FILE *f, struct erofsmount_source *source)
 {
 	char *b64cred = NULL;
 	const char *platform;
@@ -774,13 +774,13 @@ static int erofsmount_write_recovery_oci(FILE *f, struct erofs_nbd_source *sourc
 	return -EINVAL;
 }
 #else
-static int erofsmount_write_recovery_oci(FILE *f, struct erofs_nbd_source *source)
+static int erofsmount_write_recovery_oci(FILE *f, struct erofsmount_source *source)
 {
 	return -EOPNOTSUPP;
 }
 #endif
 
-static int erofsmount_write_recovery_local(FILE *f, struct erofs_nbd_source *source)
+static int erofsmount_write_recovery_local(FILE *f, struct erofsmount_source *source)
 {
 	char *realp;
 	int err;
@@ -795,7 +795,7 @@ static int erofsmount_write_recovery_local(FILE *f, struct erofs_nbd_source *sou
 	return err ? -ENOMEM : 0;
 }
 
-static char *erofsmount_write_recovery_info(struct erofs_nbd_source *source)
+static char *erofsmount_write_recovery_info(struct erofsmount_source *source)
 {
 	char recp[] = "/var/run/erofs/mountnbd_XXXXXX";
 	int fd, err;
@@ -817,7 +817,7 @@ static char *erofsmount_write_recovery_info(struct erofs_nbd_source *source)
 		return ERR_PTR(-errno);
 	}
 
-	if (source->type == EROFSNBD_SOURCE_OCI)
+	if (source->type == EROFSMOUNT_SOURCE_OCI)
 		err = erofsmount_write_recovery_oci(f, source);
 	else
 		err = erofsmount_write_recovery_local(f, source);
@@ -1026,7 +1026,7 @@ static int erofsmount_nbd_fix_backend_linkage(int num, char **recp)
 	return 0;
 }
 
-static int erofsmount_startnbd_nl(pid_t *pid, struct erofs_nbd_source *source)
+static int erofsmount_startnbd_nl(pid_t *pid, struct erofsmount_source *source)
 {
 	int pipefd[2], err, num;
 
@@ -1042,7 +1042,7 @@ static int erofsmount_startnbd_nl(pid_t *pid, struct erofs_nbd_source *source)
 		if (signal(SIGPIPE, SIG_IGN) == SIG_ERR)
 			exit(EXIT_FAILURE);
 
-		if (source->type == EROFSNBD_SOURCE_OCI) {
+		if (source->type == EROFSMOUNT_SOURCE_OCI) {
 			if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
 				err = erofsmount_tarindex_open(&ctx.vd, &source->ocicfg,
 							       source->ocicfg.tarindex_path,
@@ -1201,7 +1201,7 @@ err_identifier:
 	return err;
 }
 
-static int erofsmount_nbd(struct erofs_nbd_source *source,
+static int erofsmount_nbd(struct erofsmount_source *source,
 			  const char *mountpoint, const char *fstype,
 			  int flags, const char *options)
 {
@@ -1524,11 +1524,11 @@ int main(int argc, char *argv[])
 	}
 
 	if (mountcfg.backend == EROFSNBD) {
-		if (nbdsrc.type == EROFSNBD_SOURCE_OCI)
-			nbdsrc.ocicfg.image_ref = mountcfg.device;
+		if (mountsrc.type == EROFSMOUNT_SOURCE_OCI)
+			mountsrc.ocicfg.image_ref = mountcfg.device;
 		else
-			nbdsrc.device_path = mountcfg.device;
-		err = erofsmount_nbd(&nbdsrc, mountcfg.target,
+			mountsrc.device_path = mountcfg.device;
+		err = erofsmount_nbd(&mountsrc, mountcfg.target,
 				     mountcfg.fstype, mountcfg.flags, mountcfg.options);
 		goto exit;
 	}
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] erofs-utils: mount: add fanotify pre-content OCI backend
  2026-03-30 12:44 [PATCH 1/2 RESEND] erofs-utils: mount: generalize nbd source types for multi-backend support Yifan Zhao
@ 2026-03-30 12:44 ` Yifan Zhao
  2026-03-31  1:53   ` Gao Xiang
  2026-03-31 13:14   ` [PATCH v2 " Yifan Zhao
  0 siblings, 2 replies; 5+ messages in thread
From: Yifan Zhao @ 2026-03-30 12:44 UTC (permalink / raw)
  To: linux-erofs; +Cc: hsiangkao, jingrui, zhukeqian1, zhaoyifan28, hudsonzhu

From: Yifan Zhao <yifan.yfzhao@foxmail.com>

Add a fanotify-backed mount mode for OCI sources that uses
FAN_PRE_ACCESS permission events to populate a local sparse file
on demand before the kernel consumes the requested data.

The new erofs.fanotify subtype resolves a single OCI blob,
creates a sparse cache file, and runs a fanotify event loop
that fetches missing ranges before allowing access to proceed.

A pid file recording the canonical mountpoint and sparse-file
source is written for unmount to track the corresponding worker.

Signed-off-by: Yifan Zhao <zhaoyifan28@huawei.com>
---
 configure.ac            |  28 ++
 lib/Makefile.am         |   7 +
 lib/backends/fanotify.c | 110 +++++++
 lib/liberofs_fanotify.h |  49 +++
 lib/liberofs_oci.h      |   3 +
 lib/remotes/oci.c       |  10 +-
 mount/main.c            | 671 +++++++++++++++++++++++++++++++++++++++-
 7 files changed, 872 insertions(+), 6 deletions(-)
 create mode 100644 lib/backends/fanotify.c
 create mode 100644 lib/liberofs_fanotify.h

diff --git a/configure.ac b/configure.ac
index 8a8e9b3..45b8190 100644
--- a/configure.ac
+++ b/configure.ac
@@ -194,6 +194,10 @@ AC_ARG_ENABLE(oci,
                    [enable OCI registry based input support @<:@default=no@:>@]),
     [enable_oci="$enableval"],[enable_oci="no"])
 
+AC_ARG_ENABLE(fanotify,
+   [AS_HELP_STRING([--enable-fanotify], [enable fanotify pre-content backend @<:@default=no@:>@])],
+   [enable_fanotify="$enableval"], [enable_fanotify="no"])
+
 AC_ARG_ENABLE(fuse,
    [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
    [enable_fuse="$enableval"], [enable_fuse="no"])
@@ -651,6 +655,24 @@ AS_IF([test "x$enable_oci" = "xyes"], [
   ])
 ], [have_oci="no"])
 
+have_fanotify="no"
+AS_IF([test "x$enable_fanotify" = "xyes"], [
+  AS_IF([test "x$build_linux" != "xyes"], [
+    AC_MSG_ERROR([fanotify backend requires Linux])
+  ])
+  AS_IF([test "x$have_oci" != "xyes"], [
+    AC_MSG_ERROR([fanotify backend requires --enable-oci])
+  ])
+  AC_CHECK_HEADERS([sys/fanotify.h], [
+    have_fanotify="yes"
+    AC_CHECK_TYPES([struct fanotify_event_info_range], [], [], [[
+#include <sys/fanotify.h>
+    ]])
+  ], [
+    AC_MSG_ERROR([fanotify backend disabled: missing sys/fanotify.h])
+  ])
+])
+
 # Configure openssl
 have_openssl="no"
 AS_IF([test "x$with_openssl" != "xno"], [
@@ -766,6 +788,7 @@ AM_CONDITIONAL([ENABLE_LIBXML2], [test "x${have_libxml2}" = "xyes"])
 AM_CONDITIONAL([ENABLE_S3], [test "x${have_s3}" = "xyes"])
 AM_CONDITIONAL([ENABLE_STATIC_FUSE], [test "x${enable_static_fuse}" = "xyes"])
 AM_CONDITIONAL([ENABLE_OCI], [test "x${have_oci}" = "xyes"])
+AM_CONDITIONAL([ENABLE_FANOTIFY], [test "x${have_fanotify}" = "xyes"])
 
 if test "x$have_uuid" = "xyes"; then
   AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -842,6 +865,11 @@ if test "x$have_oci" = "xyes"; then
   AC_DEFINE([OCIEROFS_ENABLED], 1, [Define to 1 if OCI registry is enabled])
 fi
 
+if test "x$have_fanotify" = "xyes"; then
+  AC_DEFINE([EROFS_FANOTIFY_ENABLED], 1,
+	    [Define to 1 if fanotify backend is enabled])
+fi
+
 # Dump maximum block size
 AS_IF([test "x$erofs_cv_max_block_size" = "x"],
       [$erofs_cv_max_block_size = 4096], [])
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 77f6fd8..5f8812f 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -36,6 +36,10 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/lib/liberofs_s3.h
 
 noinst_HEADERS += compressor.h
+if ENABLE_FANOTIFY
+noinst_HEADERS += $(top_srcdir)/lib/liberofs_fanotify.h
+endif
+
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
 		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
@@ -88,6 +92,9 @@ if OS_LINUX
 liberofs_la_CFLAGS += ${libnl3_CFLAGS}
 liberofs_la_LDFLAGS += ${libnl3_LIBS}
 liberofs_la_SOURCES += backends/nbd.c
+if ENABLE_FANOTIFY
+liberofs_la_SOURCES += backends/fanotify.c
+endif
 endif
 liberofs_la_SOURCES += remotes/oci.c remotes/docker_config.c
 liberofs_la_CFLAGS += ${json_c_CFLAGS}
diff --git a/lib/backends/fanotify.c b/lib/backends/fanotify.c
new file mode 100644
index 0000000..66a97a1
--- /dev/null
+++ b/lib/backends/fanotify.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include "erofs/print.h"
+#include "liberofs_fanotify.h"
+
+int erofs_fanotify_init_precontent(void)
+{
+	int fan_fd;
+
+	fan_fd = fanotify_init(FAN_CLASS_PRE_CONTENT | FAN_CLOEXEC | FAN_NONBLOCK,
+			       O_RDONLY | O_LARGEFILE);
+	if (fan_fd < 0) {
+		erofs_err("fanotify_init failed: %s", strerror(errno));
+		return -errno;
+	}
+
+	return fan_fd;
+}
+
+int erofs_fanotify_mark_file(int fan_fd, const char *path)
+{
+	int err;
+
+	err = fanotify_mark(fan_fd, FAN_MARK_ADD, FAN_PRE_ACCESS, AT_FDCWD, path);
+	if (err < 0) {
+		erofs_err("fanotify_mark failed for %s: %s", path, strerror(errno));
+		return -errno;
+	}
+
+	erofs_dbg("Marked %s for FAN_PRE_ACCESS monitoring", path);
+	return 0;
+}
+
+int erofs_fanotify_parse_range_event(const struct fanotify_event_metadata *meta,
+				     struct erofs_fanotify_range *range)
+{
+	const struct fanotify_event_info_header *info_hdr;
+	const struct fanotify_event_info_range *range_info;
+	const char *ptr, *end;
+
+	if (meta->metadata_len > meta->event_len) {
+		erofs_err("Invalid fanotify metadata length");
+		return -EIO;
+	}
+
+	if (meta->vers != FANOTIFY_METADATA_VERSION) {
+		erofs_err("Unsupported fanotify metadata version %d", meta->vers);
+		return -EINVAL;
+	}
+
+	/* Initialize range to full file (will be overridden if range info present) */
+	range->offset = 0;
+	range->count = 0;
+
+	/* Parse additional info records for range information */
+	ptr = (const char *)meta + meta->metadata_len;
+	end = (const char *)meta + meta->event_len;
+
+	while (ptr < end) {
+		size_t info_len;
+
+		if (end - ptr < sizeof(*info_hdr)) {
+			erofs_err("Incomplete fanotify event info header");
+			return -EIO;
+		}
+		info_hdr = (const struct fanotify_event_info_header *)ptr;
+		info_len = info_hdr->len;
+		if (info_len < sizeof(*info_hdr) || ptr + info_len > end) {
+			erofs_err("Invalid fanotify event info length");
+			return -EIO;
+		}
+
+		if (info_hdr->info_type == FAN_EVENT_INFO_TYPE_RANGE) {
+			if (info_len < sizeof(*range_info)) {
+				erofs_err("Incomplete fanotify range info");
+				return -EIO;
+			}
+			range_info = (const struct fanotify_event_info_range *)ptr;
+			range->offset = range_info->offset;
+			range->count = range_info->count;
+			break;
+		}
+
+		ptr += info_hdr->len;
+	}
+
+	return 0;
+}
+
+int erofs_fanotify_respond(int fan_fd, int event_fd, bool allow)
+{
+	struct fanotify_response response = {
+		.fd = event_fd,
+		.response = allow ? FAN_ALLOW : FAN_DENY,
+	};
+	ssize_t ret;
+
+	ret = write(fan_fd, &response, sizeof(response));
+	if (ret != sizeof(response)) {
+		erofs_err("Failed to respond to fanotify event: %s",
+			  ret < 0 ? strerror(errno) : "short write");
+		return ret < 0 ? -errno : -EIO;
+	}
+
+	return 0;
+}
diff --git a/lib/liberofs_fanotify.h b/lib/liberofs_fanotify.h
new file mode 100644
index 0000000..a22b7ee
--- /dev/null
+++ b/lib/liberofs_fanotify.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_LIB_LIBEROFS_FANOTIFY_H
+#define __EROFS_LIB_LIBEROFS_FANOTIFY_H
+
+#include "erofs/defs.h"
+#include <sys/fanotify.h>
+
+/* FAN_PRE_ACCESS may not be defined in older headers */
+#ifndef FAN_PRE_ACCESS
+#define FAN_PRE_ACCESS 0x00100000
+#endif
+
+#ifndef FAN_CLASS_PRE_CONTENT
+#define FAN_CLASS_PRE_CONTENT 0x00000008
+#endif
+
+#ifndef FAN_EVENT_INFO_TYPE_RANGE
+#define FAN_EVENT_INFO_TYPE_RANGE 6
+#endif
+
+/* Define struct fanotify_event_info_range if not in system headers */
+#ifndef HAVE_STRUCT_FANOTIFY_EVENT_INFO_RANGE
+struct fanotify_event_info_range {
+	struct fanotify_event_info_header hdr;
+	__u32 pad;
+	__u64 offset;
+	__u64 count;
+};
+#endif
+
+struct erofs_fanotify_range {
+	u64 offset;
+	u64 count;
+};
+
+/* Initialize fanotify with FAN_CLASS_PRE_CONTENT */
+int erofs_fanotify_init_precontent(void);
+
+/* Mark file for FAN_PRE_ACCESS monitoring */
+int erofs_fanotify_mark_file(int fan_fd, const char *path);
+
+/* Parse a single fanotify event and extract range information */
+int erofs_fanotify_parse_range_event(const struct fanotify_event_metadata *meta,
+				     struct erofs_fanotify_range *range);
+
+/* Respond to fanotify permission event */
+int erofs_fanotify_respond(int fan_fd, int event_fd, bool allow);
+
+#endif
diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
index 2243c82..3b3d66d 100644
--- a/lib/liberofs_oci.h
+++ b/lib/liberofs_oci.h
@@ -76,6 +76,9 @@ struct ocierofs_iostream {
  */
 int ocierofs_build_trees(struct erofs_importer *importer,
 			 const struct ocierofs_config *cfg);
+int ocierofs_ctx_init(struct ocierofs_ctx *ctx,
+		      const struct ocierofs_config *cfg);
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx);
 int ocierofs_io_open(struct erofs_vfile *vf, const struct ocierofs_config *cfg);
 
 char *ocierofs_encode_userpass(const char *username, const char *password);
diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
index 47e8b27..f96be13 100644
--- a/lib/remotes/oci.c
+++ b/lib/remotes/oci.c
@@ -1144,7 +1144,7 @@ const char *ocierofs_get_platform_spec(void)
 }
 
 /**
- * ocierofs_init - Initialize OCI context
+ * ocierofs_ctx_init - Initialize OCI context
  * @ctx: OCI context structure to initialize
  * @config: OCI configuration
  *
@@ -1154,7 +1154,7 @@ const char *ocierofs_get_platform_spec(void)
  *
  * Return: 0 on success, negative errno on failure
  */
-static int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config)
+int ocierofs_ctx_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config)
 {
 	int ret;
 
@@ -1288,7 +1288,7 @@ out:
  * Clean up CURL handle, free all allocated string parameters, and
  * reset the OCI context structure to a clean state.
  */
-static void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
 {
 	if (!ctx)
 		return;
@@ -1316,7 +1316,7 @@ int ocierofs_build_trees(struct erofs_importer *importer,
 	int ret, i, end, fd;
 	u64 tar_offset = 0;
 
-	ret = ocierofs_init(&ctx, config);
+	ret = ocierofs_ctx_init(&ctx, config);
 	if (ret) {
 		ocierofs_ctx_cleanup(&ctx);
 		return ret;
@@ -1529,7 +1529,7 @@ int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cf
 	if (!ctx)
 		return -ENOMEM;
 
-	err = ocierofs_init(ctx, cfg);
+	err = ocierofs_ctx_init(ctx, cfg);
 	if (err)
 		goto out;
 
diff --git a/mount/main.c b/mount/main.c
index 350738d..e961937 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 #define _GNU_SOURCE
+#include <dirent.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <stdio.h>
@@ -11,6 +12,7 @@
 #include <sys/wait.h>
 #include <pthread.h>
 #include <unistd.h>
+#include <poll.h>
 #include "erofs/config.h"
 #include "erofs/print.h"
 #include "erofs/err.h"
@@ -18,6 +20,9 @@
 #include "../lib/liberofs_nbd.h"
 #include "../lib/liberofs_oci.h"
 #include "../lib/liberofs_gzran.h"
+#ifdef EROFS_FANOTIFY_ENABLED
+#include "../lib/liberofs_fanotify.h"
+#endif
 
 #ifdef HAVE_LINUX_LOOP_H
 #include <linux/loop.h>
@@ -40,12 +45,22 @@ struct loop_info {
 
 /* Device boundary probe */
 #define EROFSMOUNT_NBD_DISK_SIZE	(INT64_MAX >> 9)
+#define EROFSMOUNT_CACHE_DIR	"/var/cache/erofs"
+#define EROFSMOUNT_RUNTIME_DIR	"/run/erofs"
+#define EROFSMOUNT_FANOTIFY_STATE_DIR	EROFSMOUNT_RUNTIME_DIR "/fanotify"
+
+#ifdef EROFS_FANOTIFY_ENABLED
+#define EROFSMOUNT_FANOTIFY_HELP	", fanotify"
+#else
+#define EROFSMOUNT_FANOTIFY_HELP	""
+#endif
 
 enum erofs_backend_drv {
 	EROFSAUTO,
 	EROFSLOCAL,
 	EROFSFUSE,
 	EROFSNBD,
+	EROFSFANOTIFY,
 };
 
 enum erofsmount_mode {
@@ -95,7 +110,7 @@ static void usage(int argc, char **argv)
 		" -d <0-9>              set output verbosity; 0=quiet, 9=verbose (default=%i)\n"
 		" -o options            comma-separated list of mount options\n"
 		" -t type[.subtype]     filesystem type (and optional subtype)\n"
-		"                       subtypes: fuse, local, nbd\n"
+		"                       subtypes: fuse, local, nbd" EROFSMOUNT_FANOTIFY_HELP "\n"
 		" -u                    unmount the filesystem\n"
 		"    --disconnect       abort an existing NBD device forcibly\n"
 		"    --reattach         reattach to an existing NBD device\n"
@@ -324,6 +339,13 @@ static int erofsmount_parse_options(int argc, char **argv)
 					mountcfg.backend = EROFSLOCAL;
 				} else if (!strcmp(dot + 1, "nbd")) {
 					mountcfg.backend = EROFSNBD;
+				} else if (!strcmp(dot + 1, "fanotify")) {
+#ifdef EROFS_FANOTIFY_ENABLED
+					mountcfg.backend = EROFSFANOTIFY;
+#else
+					erofs_err("fanotify backend is not enabled at build time");
+					return -EINVAL;
+#endif
 				} else {
 					erofs_err("invalid filesystem subtype `%s`", dot + 1);
 					return -EINVAL;
@@ -1342,6 +1364,629 @@ out_err:
 	return -errno;
 }
 
+#ifdef EROFS_FANOTIFY_ENABLED
+struct erofsmount_fanotify_state {
+	pid_t pid;
+	char *mountpoint;
+	char *source;
+};
+
+static void erofsmount_free_fanotify_state(struct erofsmount_fanotify_state *state)
+{
+	free(state->mountpoint);
+	free(state->source);
+	state->mountpoint = NULL;
+	state->source = NULL;
+}
+
+static int erofsmount_write_fanotify_state(const char *state_path, pid_t pid,
+					   const char *mountpoint,
+					   const char *source)
+{
+	struct erofsmount_fanotify_state state;
+	char *tmp_path = NULL;
+	FILE *f = NULL;
+	int fd = -1, err;
+
+	if (mkdir(EROFSMOUNT_RUNTIME_DIR, 0700) < 0 && errno != EEXIST)
+		return -errno;
+	if (mkdir(EROFSMOUNT_FANOTIFY_STATE_DIR, 0700) < 0 &&
+	    errno != EEXIST)
+		return -errno;
+
+	state.pid = pid;
+	state.mountpoint = (char *)mountpoint;
+	state.source = (char *)source;
+
+	if (asprintf(&tmp_path, "%s.tmpXXXXXX", state_path) < 0)
+		return -ENOMEM;
+
+	fd = mkstemp(tmp_path);
+	if (fd < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	f = fdopen(fd, "w");
+	if (!f) {
+		err = -errno;
+		goto out;
+	}
+	fd = -1;
+
+	if (fprintf(f, "%d\n%s\n%s\n", state.pid, state.mountpoint,
+		    state.source) < 0 || fflush(f) == EOF) {
+		err = errno ? -errno : -EIO;
+		goto out;
+	}
+
+	if (fsync(fileno(f)) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	if (fclose(f) < 0) {
+		err = -errno;
+		f = NULL;
+		goto out;
+	}
+	f = NULL;
+
+	if (rename(tmp_path, state_path) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	err = 0;
+out:
+	if (f)
+		fclose(f);
+	else if (fd >= 0)
+		close(fd);
+	if (err && tmp_path)
+		unlink(tmp_path);
+	free(tmp_path);
+	return err;
+}
+
+static int erofsmount_read_fanotify_state(const char *state_path,
+					  struct erofsmount_fanotify_state *state)
+{
+	FILE *f;
+	size_t n = 0;
+	int err = 0;
+
+	memset(state, 0, sizeof(*state));
+
+	f = fopen(state_path, "r");
+	if (!f)
+		return -errno;
+
+	if (fscanf(f, "%d", &state->pid) != 1)
+		err = -EINVAL;
+	else if (fgetc(f) != '\n')
+		err = -EINVAL;
+	else if (getline(&state->mountpoint, &n, f) < 0)
+		err = feof(f) ? -EINVAL : -errno;
+	else if (getline(&state->source, &n, f) < 0)
+		err = feof(f) ? -EINVAL : -errno;
+	fclose(f);
+	if (err) {
+		erofsmount_free_fanotify_state(state);
+		return err;
+	}
+
+	state->mountpoint[strcspn(state->mountpoint, "\n")] = '\0';
+	state->source[strcspn(state->source, "\n")] = '\0';
+	return err;
+}
+
+static int erofsmount_cleanup_fanotify_worker(const char *mountpoint,
+					      const char *source)
+{
+	DIR *dir;
+	struct dirent *de;
+	int err = 0;
+
+	dir = opendir(EROFSMOUNT_FANOTIFY_STATE_DIR);
+	if (!dir) {
+		if (errno == ENOENT)
+			return 0;
+		return -errno;
+	}
+
+	while ((de = readdir(dir)) != NULL) {
+		struct erofsmount_fanotify_state state;
+		char *state_path;
+
+		if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+			continue;
+		if (!strstr(de->d_name, ".state"))
+			continue;
+		if (asprintf(&state_path, "%s/%s", EROFSMOUNT_FANOTIFY_STATE_DIR,
+			     de->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = erofsmount_read_fanotify_state(state_path, &state);
+		if (err == -ENOENT) {
+			free(state_path);
+			err = 0;
+			continue;
+		}
+		if (err) {
+			free(state_path);
+			goto out;
+		}
+		if (strcmp(state.mountpoint, mountpoint) ||
+		    strcmp(state.source, source)) {
+			erofsmount_free_fanotify_state(&state);
+			free(state_path);
+			continue;
+		}
+		if (kill(state.pid, SIGTERM) < 0 && errno != ESRCH)
+			err = -errno;
+		else if (unlink(state_path) < 0 && errno != ENOENT)
+			err = -errno;
+		erofsmount_free_fanotify_state(&state);
+		free(state_path);
+		goto out;
+	}
+out:
+	closedir(dir);
+	if (!err)
+		return 0;
+	return err;
+}
+
+struct erofsmount_fanotify_ctx {
+	struct erofs_vfile vd;		/* OCI virtual device */
+	int sparse_fd;			/* sparse file descriptor */
+	int fan_fd;			/* fanotify fd */
+	char *sparse_path;		/* path to sparse file */
+	u64 image_size;			/* blob size */
+};
+
+static int erofsmount_create_sparse_file(struct erofsmount_fanotify_ctx *ctx,
+					 u64 size, const char *blob_digest)
+{
+	char filepath[PATH_MAX];
+	const char *hex_digest;
+	int fd, err;
+
+	/* Extract hex part from "sha256:xxxx..." */
+	if (!blob_digest || strncmp(blob_digest, "sha256:", 7) != 0)
+		return -EINVAL;
+	hex_digest = blob_digest + 7;
+
+	/* Construct file path using blob SHA256 */
+	snprintf(filepath, sizeof(filepath), EROFSMOUNT_CACHE_DIR "/%s",
+		 hex_digest);
+
+	/* Try to open existing file or create new one */
+	fd = open(filepath, O_RDWR | O_CREAT, 0600);
+	if (fd < 0 && errno == ENOENT) {
+		err = mkdir(EROFSMOUNT_CACHE_DIR, 0700);
+		if (err)
+			return -errno;
+		fd = open(filepath, O_RDWR | O_CREAT, 0600);
+	}
+	if (fd < 0)
+		return -errno;
+
+	ctx->sparse_path = strdup(filepath);
+	if (!ctx->sparse_path) {
+		err = -ENOMEM;
+		goto err_path;
+	}
+
+	/* Set file size (creates sparse file) */
+	if (ftruncate(fd, size) < 0) {
+		err = -errno;
+		goto err_ftruncate;
+	}
+
+	ctx->sparse_fd = fd;
+	ctx->image_size = size;
+
+	erofs_dbg("Created local sparse file %s (size: %llu bytes)",
+		  ctx->sparse_path, (unsigned long long)size);
+	return 0;
+
+err_ftruncate:
+	free(ctx->sparse_path);
+	ctx->sparse_path = NULL;
+err_path:
+	close(fd);
+	unlink(filepath);
+	return err;
+}
+
+static bool erofsmount_range_in_sparse(int fd, u64 offset, size_t length)
+{
+	off_t data_start, hole_start;
+
+	/* Check if data exists at offset */
+	data_start = lseek(fd, offset, SEEK_DATA);
+	if (data_start < 0) {
+		if (errno == ENXIO)
+			return false;  /* No data in file at or after offset */
+		return false;  /* Error, assume not present */
+	}
+
+	/* If data doesn't start at our offset, range is not fully present */
+	if ((u64)data_start != offset)
+		return false;
+
+	/* Check if there's a hole before the end of our range */
+	hole_start = lseek(fd, offset, SEEK_HOLE);
+	if (hole_start < 0)
+		return false;
+
+	/* If hole starts before our range ends, data is not fully present */
+	if ((u64)hole_start < offset + length)
+		return false;
+
+	return true;
+}
+
+static int erofsmount_resolve_fanotify_blob(const struct ocierofs_config *oci_cfg,
+					    char **digest, u64 *image_size)
+{
+	struct ocierofs_ctx oci_ctx = {};
+	int err, i = -1;
+
+	err = ocierofs_ctx_init(&oci_ctx, oci_cfg);
+	if (err)
+		return err;
+
+	if (oci_ctx.blob_digest) {
+		for (i = 0; i < oci_ctx.layer_count; ++i) {
+			if (!strcmp(oci_ctx.layers[i]->digest, oci_ctx.blob_digest))
+				break;
+		}
+		if (i >= oci_ctx.layer_count) {
+			err = -ENOENT;
+			goto out;
+		}
+	} else if (oci_ctx.layer_count == 1) {
+		i = 0;
+	} else {
+		erofs_err("fanotify backend requires exactly one OCI blob; use oci.blob= or oci.layer=");
+		err = -EINVAL;
+		goto out;
+	}
+
+	*digest = strdup(oci_ctx.layers[i]->digest);
+	if (!*digest) {
+		err = -ENOMEM;
+		goto out;
+	}
+	*image_size = oci_ctx.layers[i]->size;
+	err = 0;
+
+out:
+	ocierofs_ctx_cleanup(&oci_ctx);
+	return err;
+}
+
+static int erofs_fanotify_handle_event(struct erofsmount_fanotify_ctx *ctx,
+				       struct fanotify_event_metadata *meta,
+				       void **fetch_buf, size_t *fetch_buf_size)
+{
+	struct erofs_fanotify_range range;
+	bool allow_access = true;
+	u64 offset;
+	size_t length;
+	ssize_t read_len, written;
+	int err, resp_err;
+
+	err = erofs_fanotify_parse_range_event(meta, &range);
+	if (err < 0) {
+		erofs_err("Failed to parse fanotify event: %s",
+			  erofs_strerror(err));
+		allow_access = false;
+		goto response;
+	}
+
+	if (!(meta->mask & FAN_PRE_ACCESS))
+		goto response;
+
+	offset = range.offset;
+	length = range.count;
+
+	if (length == 0)
+		length = min_t(u64, 1024 * 1024, ctx->image_size - offset);
+
+	if (offset >= ctx->image_size)
+		goto response;
+
+	/* Clamp length to not exceed file size */
+	if (offset + length > ctx->image_size)
+		length = ctx->image_size - offset;
+
+	/* Check if data already exists locally in sparse file */
+	if (erofsmount_range_in_sparse(ctx->sparse_fd, offset, length)) {
+		erofs_dbg("Range [%llu, %llu) already local, skipping fetch",
+			  (unsigned long long)offset,
+			  (unsigned long long)(offset + length));
+		goto response;
+	}
+
+	if (*fetch_buf_size < length) {
+		void *newbuf = realloc(*fetch_buf, length);
+
+		if (!newbuf) {
+			erofs_err("Failed to allocate %zu bytes", length);
+			err = -ENOMEM;
+			allow_access = false;
+			goto response;
+		}
+		*fetch_buf = newbuf;
+		*fetch_buf_size = length;
+	}
+
+	erofs_dbg("Fetching range [%llu, %llu)",
+		  (unsigned long long)offset,
+		  (unsigned long long)(offset + length));
+
+	read_len = erofs_io_pread(&ctx->vd, *fetch_buf, length, offset);
+	if (read_len < 0) {
+		erofs_err("Failed to fetch range [%llu, %llu): %s",
+			  (unsigned long long)offset,
+			  (unsigned long long)(offset + length),
+			  erofs_strerror(read_len));
+		err = read_len;
+		allow_access = false;
+		goto response;
+	}
+
+	written = pwrite(ctx->sparse_fd, *fetch_buf, read_len, offset);
+	if (written != read_len) {
+		erofs_err("Failed to write to sparse file at offset %llu: %s",
+			  (unsigned long long)offset,
+			  written < 0 ? strerror(errno) : "short write");
+		err = written < 0 ? -errno : -EIO;
+		allow_access = false;
+		goto response;
+	}
+
+	fsync(ctx->sparse_fd);
+	err = 0;
+
+response:
+	resp_err = erofs_fanotify_respond(ctx->fan_fd, meta->fd, allow_access);
+	if (meta->fd >= 0)
+		close(meta->fd);
+	return resp_err ? resp_err : err;
+}
+
+static int erofsmount_fanotify_loop(struct erofsmount_fanotify_ctx *ctx)
+{
+	char event_buf[4096] __attribute__((aligned(8)));
+	void *fetch_buf = NULL;
+	size_t fetch_buf_size = 0;
+	struct pollfd pfd;
+	int err = 0;
+
+	pfd.fd = ctx->fan_fd;
+	pfd.events = POLLIN;
+
+	while (1) {
+		struct fanotify_event_metadata *meta;
+		ssize_t len, remaining;
+
+		len = read(ctx->fan_fd, event_buf, sizeof(event_buf));
+		if (len <= 0) {
+			if (len < 0) {
+				if (errno == EAGAIN) {
+					if (poll(&pfd, 1, -1) < 0) {
+						if (errno == EINTR)
+							continue;
+						err = -errno;
+						break;
+					}
+					continue;
+				}
+				if (errno == EINTR)
+					continue;
+				err = -errno;
+				if (err == -EPIPE) {
+					err = 0;
+					break;
+				}
+				erofs_err("Failed to read fanotify events: %s",
+					  erofs_strerror(err));
+				break;
+			}
+			err = -EIO;
+			erofs_err("Unexpected EOF on fanotify fd");
+			break;
+		}
+
+		remaining = len;
+		for (meta = (struct fanotify_event_metadata *)event_buf;
+		     FAN_EVENT_OK(meta, remaining);
+		     meta = FAN_EVENT_NEXT(meta, remaining)) {
+			erofs_dbg("Handling fanotify event: mask=0x%llx fd=%d pid=%d",
+				  (unsigned long long)meta->mask,
+				  meta->fd, meta->pid);
+			err = erofs_fanotify_handle_event(ctx, meta, &fetch_buf,
+							  &fetch_buf_size);
+			if (err < 0)
+				break;
+		}
+		if (err)
+			break;
+		if (remaining) {
+			erofs_err("Invalid or incomplete fanotify event buffer");
+			err = -EIO;
+			break;
+		}
+	}
+
+	free(fetch_buf);
+	return err;
+}
+
+static void erofsmount_fanotify_ctx_cleanup(struct erofsmount_fanotify_ctx *ctx)
+{
+	if (ctx->fan_fd >= 0)
+		close(ctx->fan_fd);
+	if (ctx->sparse_fd >= 0)
+		close(ctx->sparse_fd);
+	if (ctx->vd.ops || ctx->vd.fd >= 0)
+		erofs_io_close(&ctx->vd);
+	free(ctx->sparse_path);
+}
+
+static int erofsmount_fanotify_child(struct erofsmount_fanotify_ctx *ctx,
+				     int pipefd)
+{
+	int err;
+
+	ctx->fan_fd = erofs_fanotify_init_precontent();
+	if (ctx->fan_fd < 0) {
+		err = ctx->fan_fd;
+		goto notify;
+	}
+
+	err = erofs_fanotify_mark_file(ctx->fan_fd, ctx->sparse_path);
+	if (err)
+		goto notify;
+
+	err = 0;
+notify:
+	write(pipefd, &err, sizeof(err));
+	close(pipefd);
+
+	if (err)
+		return err;
+
+	return erofsmount_fanotify_loop(ctx);
+}
+
+static int erofsmount_fanotify(struct erofsmount_source *source,
+			       const char *mountpoint, const char *fstype,
+			       int flags, const char *options)
+{
+	struct erofsmount_fanotify_ctx ctx = {
+		.vd = {.fd = -1},
+		.sparse_fd = -1,
+		.fan_fd = -1,
+	};
+	struct ocierofs_config layer_cfg;
+	char *blob_digest = NULL;
+	char *state_mountpoint = NULL;
+	char *state_path = NULL;
+	pid_t pid = -1;
+	int pipefd[2];
+	int err, child_err;
+	u64 image_size;
+
+	if (strcmp(fstype, "erofs")) {
+		fprintf(stderr, "unsupported filesystem type `%s`\n", fstype);
+		return -ENODEV;
+	}
+	flags |= MS_RDONLY;
+
+	if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
+		erofs_err("fanotify backend does not support tarindex or zinfo");
+		return -EOPNOTSUPP;
+	}
+
+	state_mountpoint = realpath(mountpoint, NULL);
+	if (!state_mountpoint) {
+		err = -errno;
+		goto out;
+	}
+
+	err = erofsmount_resolve_fanotify_blob(&source->ocicfg, &blob_digest,
+					       &image_size);
+	if (err)
+		goto out;
+
+	layer_cfg = source->ocicfg;
+	layer_cfg.blob_digest = blob_digest;
+	layer_cfg.layer_index = -1;
+
+	err = ocierofs_io_open(&ctx.vd, &layer_cfg);
+	if (err)
+		goto out;
+
+	err = erofsmount_create_sparse_file(&ctx, image_size, blob_digest);
+	if (err)
+		goto out;
+
+	/* Create pipe for parent-child communication */
+	if (pipe(pipefd) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		err = -errno;
+		close(pipefd[0]);
+		close(pipefd[1]);
+		goto out;
+	}
+
+	if (pid == 0) {
+		close(pipefd[0]);
+		err = erofsmount_fanotify_child(&ctx, pipefd[1]);
+		erofsmount_fanotify_ctx_cleanup(&ctx);
+		exit(err ? EXIT_FAILURE : EXIT_SUCCESS);
+	}
+
+	/* Wait for child to report fanotify initialization result */
+	close(pipefd[1]);
+	if (read(pipefd[0], &child_err, sizeof(child_err)) != sizeof(child_err))
+		child_err = -EPIPE;
+	close(pipefd[0]);
+
+	if (child_err) {
+		erofs_err("Child process failed: %s", erofs_strerror(child_err));
+		err = child_err;
+		goto kill_child;
+	}
+
+	err = mount(ctx.sparse_path, mountpoint, fstype, flags, options);
+	if (err < 0)
+		err = -errno;
+	if (err)
+		goto kill_child;
+
+	if (asprintf(&state_path, "%s/%d.state",
+		     EROFSMOUNT_FANOTIFY_STATE_DIR, pid) < 0) {
+		err = -ENOMEM;
+		goto out_umount;
+	}
+
+	err = erofsmount_write_fanotify_state(state_path, pid, state_mountpoint,
+					      ctx.sparse_path);
+	if (err)
+		goto out_umount;
+	erofs_dbg("Mounted %s at %s successfully", ctx.sparse_path, mountpoint);
+	goto out;
+
+out_umount:
+	(void)umount(mountpoint);
+kill_child:
+	if (pid > 0) {
+		(void)kill(pid, SIGTERM);
+		(void)waitpid(pid, NULL, 0);
+	}
+out:
+	free(state_path);
+	free(state_mountpoint);
+	erofsmount_fanotify_ctx_cleanup(&ctx);
+	free(blob_digest);
+	return err;
+}
+#endif
+
 int erofsmount_umount(char *target)
 {
 	char *device = NULL, *mountpoint = NULL;
@@ -1437,6 +2082,15 @@ int erofsmount_umount(char *target)
 			goto err_out;
 		}
 	}
+#ifdef EROFS_FANOTIFY_ENABLED
+	if (!isblk) {
+		err = erofsmount_cleanup_fanotify_worker(target, device);
+		if (err) {
+			close(fd);
+			goto err_out;
+		}
+	}
+#endif
 	err = fstat(fd, &st);
 	if (err < 0)
 		err = -errno;
@@ -1533,6 +2187,21 @@ int main(int argc, char *argv[])
 		goto exit;
 	}
 
+#ifdef EROFS_FANOTIFY_ENABLED
+	if (mountcfg.backend == EROFSFANOTIFY) {
+		if (mountsrc.type != EROFSMOUNT_SOURCE_OCI) {
+			erofs_err("Fanotify backend only supports OCI sources");
+			err = -EINVAL;
+			goto exit;
+		}
+		mountsrc.ocicfg.image_ref = mountcfg.device;
+		err = erofsmount_fanotify(&mountsrc, mountcfg.target,
+					  mountcfg.fstype, mountcfg.flags,
+					  mountcfg.options);
+		goto exit;
+	}
+#endif
+
 	if (mountcfg.force_loopdev)
 		goto loopmount;
 
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] erofs-utils: mount: add fanotify pre-content OCI backend
  2026-03-30 12:44 ` [PATCH 2/2] erofs-utils: mount: add fanotify pre-content OCI backend Yifan Zhao
@ 2026-03-31  1:53   ` Gao Xiang
  2026-03-31 13:14   ` [PATCH v2 " Yifan Zhao
  1 sibling, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2026-03-31  1:53 UTC (permalink / raw)
  To: Yifan Zhao, linux-erofs; +Cc: jingrui, zhukeqian1, hudsonzhu

Hi Yifan,

On 2026/3/30 20:44, Yifan Zhao wrote:
> From: Yifan Zhao <yifan.yfzhao@foxmail.com>

The author seems incorrect here.

> 
> Add a fanotify-backed mount mode for OCI sources that uses
> FAN_PRE_ACCESS permission events to populate a local sparse file
> on demand before the kernel consumes the requested data.
> 
> The new erofs.fanotify subtype resolves a single OCI blob,
> creates a sparse cache file, and runs a fanotify event loop
> that fetches missing ranges before allowing access to proceed.
> 
> A pid file recording the canonical mountpoint and sparse-file
> source is written for unmount to track the corresponding worker.
> 
> Signed-off-by: Yifan Zhao <zhaoyifan28@huawei.com>
> ---
>   configure.ac            |  28 ++
>   lib/Makefile.am         |   7 +
>   lib/backends/fanotify.c | 110 +++++++
>   lib/liberofs_fanotify.h |  49 +++
>   lib/liberofs_oci.h      |   3 +
>   lib/remotes/oci.c       |  10 +-
>   mount/main.c            | 671 +++++++++++++++++++++++++++++++++++++++-
>   7 files changed, 872 insertions(+), 6 deletions(-)
>   create mode 100644 lib/backends/fanotify.c
>   create mode 100644 lib/liberofs_fanotify.h
> 
> diff --git a/configure.ac b/configure.ac
> index 8a8e9b3..45b8190 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -194,6 +194,10 @@ AC_ARG_ENABLE(oci,
>                      [enable OCI registry based input support @<:@default=no@:>@]),
>       [enable_oci="$enableval"],[enable_oci="no"])
>   
> +AC_ARG_ENABLE(fanotify,
> +   [AS_HELP_STRING([--enable-fanotify], [enable fanotify pre-content backend @<:@default=no@:>@])],
> +   [enable_fanotify="$enableval"], [enable_fanotify="no"])
> +
>   AC_ARG_ENABLE(fuse,
>      [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
>      [enable_fuse="$enableval"], [enable_fuse="no"])
> @@ -651,6 +655,24 @@ AS_IF([test "x$enable_oci" = "xyes"], [
>     ])
>   ], [have_oci="no"])
>   
> +have_fanotify="no"
> +AS_IF([test "x$enable_fanotify" = "xyes"], [
> +  AS_IF([test "x$build_linux" != "xyes"], [
> +    AC_MSG_ERROR([fanotify backend requires Linux])
> +  ])
> +  AS_IF([test "x$have_oci" != "xyes"], [
> +    AC_MSG_ERROR([fanotify backend requires --enable-oci])
> +  ])
> +  AC_CHECK_HEADERS([sys/fanotify.h], [
> +    have_fanotify="yes"
> +    AC_CHECK_TYPES([struct fanotify_event_info_range], [], [], [[
> +#include <sys/fanotify.h>
> +    ]])
> +  ], [
> +    AC_MSG_ERROR([fanotify backend disabled: missing sys/fanotify.h])
> +  ])
> +])
> +
>   # Configure openssl
>   have_openssl="no"
>   AS_IF([test "x$with_openssl" != "xno"], [
> @@ -766,6 +788,7 @@ AM_CONDITIONAL([ENABLE_LIBXML2], [test "x${have_libxml2}" = "xyes"])
>   AM_CONDITIONAL([ENABLE_S3], [test "x${have_s3}" = "xyes"])
>   AM_CONDITIONAL([ENABLE_STATIC_FUSE], [test "x${enable_static_fuse}" = "xyes"])
>   AM_CONDITIONAL([ENABLE_OCI], [test "x${have_oci}" = "xyes"])
> +AM_CONDITIONAL([ENABLE_FANOTIFY], [test "x${have_fanotify}" = "xyes"])
>   
>   if test "x$have_uuid" = "xyes"; then
>     AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
> @@ -842,6 +865,11 @@ if test "x$have_oci" = "xyes"; then
>     AC_DEFINE([OCIEROFS_ENABLED], 1, [Define to 1 if OCI registry is enabled])
>   fi
>   
> +if test "x$have_fanotify" = "xyes"; then
> +  AC_DEFINE([EROFS_FANOTIFY_ENABLED], 1,
> +	    [Define to 1 if fanotify backend is enabled])
> +fi
> +
>   # Dump maximum block size
>   AS_IF([test "x$erofs_cv_max_block_size" = "x"],
>         [$erofs_cv_max_block_size = 4096], [])
> diff --git a/lib/Makefile.am b/lib/Makefile.am
> index 77f6fd8..5f8812f 100644
> --- a/lib/Makefile.am
> +++ b/lib/Makefile.am
> @@ -36,6 +36,10 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
>         $(top_srcdir)/lib/liberofs_s3.h
>   
>   noinst_HEADERS += compressor.h
> +if ENABLE_FANOTIFY
> +noinst_HEADERS += $(top_srcdir)/lib/liberofs_fanotify.h
> +endif
> +
>   liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
>   		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
>   		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
> @@ -88,6 +92,9 @@ if OS_LINUX
>   liberofs_la_CFLAGS += ${libnl3_CFLAGS}
>   liberofs_la_LDFLAGS += ${libnl3_LIBS}
>   liberofs_la_SOURCES += backends/nbd.c
> +if ENABLE_FANOTIFY
> +liberofs_la_SOURCES += backends/fanotify.c
> +endif
>   endif
>   liberofs_la_SOURCES += remotes/oci.c remotes/docker_config.c
>   liberofs_la_CFLAGS += ${json_c_CFLAGS}
> diff --git a/lib/backends/fanotify.c b/lib/backends/fanotify.c
> new file mode 100644
> index 0000000..66a97a1
> --- /dev/null
> +++ b/lib/backends/fanotify.c
> @@ -0,0 +1,110 @@
> +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
> +#define _GNU_SOURCE
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <string.h>
> +#include "erofs/print.h"
> +#include "liberofs_fanotify.h"
> +
> +int erofs_fanotify_init_precontent(void)
> +{
> +	int fan_fd;
> +
> +	fan_fd = fanotify_init(FAN_CLASS_PRE_CONTENT | FAN_CLOEXEC | FAN_NONBLOCK,
> +			       O_RDONLY | O_LARGEFILE);
> +	if (fan_fd < 0) {
> +		erofs_err("fanotify_init failed: %s", strerror(errno));
> +		return -errno;
> +	}
> +
> +	return fan_fd;
> +}
> +
> +int erofs_fanotify_mark_file(int fan_fd, const char *path)
> +{
> +	int err;
> +
> +	err = fanotify_mark(fan_fd, FAN_MARK_ADD, FAN_PRE_ACCESS, AT_FDCWD, path);
> +	if (err < 0) {
> +		erofs_err("fanotify_mark failed for %s: %s", path, strerror(errno));
> +		return -errno;
> +	}
> +
> +	erofs_dbg("Marked %s for FAN_PRE_ACCESS monitoring", path);
> +	return 0;
> +}
> +
> +int erofs_fanotify_parse_range_event(const struct fanotify_event_metadata *meta,
> +				     struct erofs_fanotify_range *range)
> +{
> +	const struct fanotify_event_info_header *info_hdr;
> +	const struct fanotify_event_info_range *range_info;
> +	const char *ptr, *end;
> +
> +	if (meta->metadata_len > meta->event_len) {
> +		erofs_err("Invalid fanotify metadata length");
> +		return -EIO;
> +	}
> +
> +	if (meta->vers != FANOTIFY_METADATA_VERSION) {
> +		erofs_err("Unsupported fanotify metadata version %d", meta->vers);
> +		return -EINVAL;
> +	}
> +
> +	/* Initialize range to full file (will be overridden if range info present) */
> +	range->offset = 0;
> +	range->count = 0;
> +
> +	/* Parse additional info records for range information */
> +	ptr = (const char *)meta + meta->metadata_len;
> +	end = (const char *)meta + meta->event_len;
> +
> +	while (ptr < end) {
> +		size_t info_len;
> +
> +		if (end - ptr < sizeof(*info_hdr)) {
> +			erofs_err("Incomplete fanotify event info header");
> +			return -EIO;
> +		}
> +		info_hdr = (const struct fanotify_event_info_header *)ptr;
> +		info_len = info_hdr->len;
> +		if (info_len < sizeof(*info_hdr) || ptr + info_len > end) {
> +			erofs_err("Invalid fanotify event info length");
> +			return -EIO;
> +		}
> +
> +		if (info_hdr->info_type == FAN_EVENT_INFO_TYPE_RANGE) {
> +			if (info_len < sizeof(*range_info)) {
> +				erofs_err("Incomplete fanotify range info");
> +				return -EIO;
> +			}
> +			range_info = (const struct fanotify_event_info_range *)ptr;
> +			range->offset = range_info->offset;
> +			range->count = range_info->count;
> +			break;
> +		}
> +
> +		ptr += info_hdr->len;
> +	}
> +
> +	return 0;
> +}
> +
> +int erofs_fanotify_respond(int fan_fd, int event_fd, bool allow)
> +{
> +	struct fanotify_response response = {
> +		.fd = event_fd,
> +		.response = allow ? FAN_ALLOW : FAN_DENY,
> +	};
> +	ssize_t ret;
> +
> +	ret = write(fan_fd, &response, sizeof(response));
> +	if (ret != sizeof(response)) {
> +		erofs_err("Failed to respond to fanotify event: %s",
> +			  ret < 0 ? strerror(errno) : "short write");
> +		return ret < 0 ? -errno : -EIO;
> +	}
> +
> +	return 0;
> +}
> diff --git a/lib/liberofs_fanotify.h b/lib/liberofs_fanotify.h
> new file mode 100644
> index 0000000..a22b7ee
> --- /dev/null
> +++ b/lib/liberofs_fanotify.h
> @@ -0,0 +1,49 @@
> +/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
> +#ifndef __EROFS_LIB_LIBEROFS_FANOTIFY_H
> +#define __EROFS_LIB_LIBEROFS_FANOTIFY_H
> +
> +#include "erofs/defs.h"
> +#include <sys/fanotify.h>
> +
> +/* FAN_PRE_ACCESS may not be defined in older headers */
> +#ifndef FAN_PRE_ACCESS
> +#define FAN_PRE_ACCESS 0x00100000
> +#endif

How about called EROFS_FAN_PRE_ACCESS instead, like

#ifndef FAN_PRE_ACCESS
#define EROFS_FAN_PRE_ACCESS	0x00100000
#else
#define EROFS_FAN_PRE_ACCESS	FAN_PRE_ACCESS
#endif

> +
> +#ifndef FAN_CLASS_PRE_CONTENT
> +#define FAN_CLASS_PRE_CONTENT 0x00000008
> +#endif

Same here.

> +
> +#ifndef FAN_EVENT_INFO_TYPE_RANGE
> +#define FAN_EVENT_INFO_TYPE_RANGE 6
> +#endif

Same here.

> +
> +/* Define struct fanotify_event_info_range if not in system headers */
> +#ifndef HAVE_STRUCT_FANOTIFY_EVENT_INFO_RANGE
> +struct fanotify_event_info_range {
> +	struct fanotify_event_info_header hdr;
> +	__u32 pad;
> +	__u64 offset;
> +	__u64 count;
> +};
> +#endif

Same here.

#ifndef HAVE_STRUCT_FANOTIFY_EVENT_INFO_RANGE
typedef struct erofs_fanotify_event_info_range {
	struct fanotify_event_info_header hdr;
	...
} erofs_fanotify_event_info_range_t;
#else
typedef struct fanotify_event_info_range erofs_fanotify_event_info_range_t;
#endif

and use `erofs_fanotify_event_info_range_t` instead.

> +
> +struct erofs_fanotify_range {
> +	u64 offset;
> +	u64 count;
> +};
> +
> +/* Initialize fanotify with FAN_CLASS_PRE_CONTENT */
> +int erofs_fanotify_init_precontent(void);
> +
> +/* Mark file for FAN_PRE_ACCESS monitoring */
> +int erofs_fanotify_mark_file(int fan_fd, const char *path);
> +
> +/* Parse a single fanotify event and extract range information */
> +int erofs_fanotify_parse_range_event(const struct fanotify_event_metadata *meta,
> +				     struct erofs_fanotify_range *range);
> +
> +/* Respond to fanotify permission event */
> +int erofs_fanotify_respond(int fan_fd, int event_fd, bool allow);
> +
> +#endif
> diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
> index 2243c82..3b3d66d 100644
> --- a/lib/liberofs_oci.h
> +++ b/lib/liberofs_oci.h
> @@ -76,6 +76,9 @@ struct ocierofs_iostream {
>    */
>   int ocierofs_build_trees(struct erofs_importer *importer,
>   			 const struct ocierofs_config *cfg);
> +int ocierofs_ctx_init(struct ocierofs_ctx *ctx,
> +		      const struct ocierofs_config *cfg);
> +void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx);
>   int ocierofs_io_open(struct erofs_vfile *vf, const struct ocierofs_config *cfg);
>   
>   char *ocierofs_encode_userpass(const char *username, const char *password);
> diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
> index 47e8b27..f96be13 100644
> --- a/lib/remotes/oci.c
> +++ b/lib/remotes/oci.c
> @@ -1144,7 +1144,7 @@ const char *ocierofs_get_platform_spec(void)
>   }
>   
>   /**
> - * ocierofs_init - Initialize OCI context
> + * ocierofs_ctx_init - Initialize OCI context
>    * @ctx: OCI context structure to initialize
>    * @config: OCI configuration
>    *
> @@ -1154,7 +1154,7 @@ const char *ocierofs_get_platform_spec(void)
>    *
>    * Return: 0 on success, negative errno on failure
>    */
> -static int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config)
> +int ocierofs_ctx_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config)
>   {
>   	int ret;
>   
> @@ -1288,7 +1288,7 @@ out:
>    * Clean up CURL handle, free all allocated string parameters, and
>    * reset the OCI context structure to a clean state.
>    */
> -static void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
> +void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
>   {
>   	if (!ctx)
>   		return;
> @@ -1316,7 +1316,7 @@ int ocierofs_build_trees(struct erofs_importer *importer,
>   	int ret, i, end, fd;
>   	u64 tar_offset = 0;
>   
> -	ret = ocierofs_init(&ctx, config);
> +	ret = ocierofs_ctx_init(&ctx, config);
>   	if (ret) {
>   		ocierofs_ctx_cleanup(&ctx);
>   		return ret;
> @@ -1529,7 +1529,7 @@ int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cf
>   	if (!ctx)
>   		return -ENOMEM;
>   
> -	err = ocierofs_init(ctx, cfg);
> +	err = ocierofs_ctx_init(ctx, cfg);
>   	if (err)
>   		goto out;
>   
> diff --git a/mount/main.c b/mount/main.c
> index 350738d..e961937 100644
> --- a/mount/main.c
> +++ b/mount/main.c
> @@ -1,5 +1,6 @@
>   // SPDX-License-Identifier: GPL-2.0+
>   #define _GNU_SOURCE
> +#include <dirent.h>
>   #include <fcntl.h>
>   #include <getopt.h>
>   #include <stdio.h>
> @@ -11,6 +12,7 @@
>   #include <sys/wait.h>
>   #include <pthread.h>
>   #include <unistd.h>
> +#include <poll.h>
>   #include "erofs/config.h"
>   #include "erofs/print.h"
>   #include "erofs/err.h"
> @@ -18,6 +20,9 @@
>   #include "../lib/liberofs_nbd.h"
>   #include "../lib/liberofs_oci.h"
>   #include "../lib/liberofs_gzran.h"
> +#ifdef EROFS_FANOTIFY_ENABLED
> +#include "../lib/liberofs_fanotify.h"
> +#endif
>   
>   #ifdef HAVE_LINUX_LOOP_H
>   #include <linux/loop.h>
> @@ -40,12 +45,22 @@ struct loop_info {
>   
>   /* Device boundary probe */
>   #define EROFSMOUNT_NBD_DISK_SIZE	(INT64_MAX >> 9)
> +#define EROFSMOUNT_CACHE_DIR	"/var/cache/erofs"

`/var/cache/erofsmount` ?

> +#define EROFSMOUNT_RUNTIME_DIR	"/run/erofs"

`/run/erofsmount` ?

> +#define EROFSMOUNT_FANOTIFY_STATE_DIR	EROFSMOUNT_RUNTIME_DIR "/fanotify"
> +
> +#ifdef EROFS_FANOTIFY_ENABLED
> +#define EROFSMOUNT_FANOTIFY_HELP	", fanotify"
> +#else
> +#define EROFSMOUNT_FANOTIFY_HELP	""
> +#endif
>   
>   enum erofs_backend_drv {
>   	EROFSAUTO,
>   	EROFSLOCAL,
>   	EROFSFUSE,
>   	EROFSNBD,
> +	EROFSFANOTIFY,
>   };
>   
>   enum erofsmount_mode {
> @@ -95,7 +110,7 @@ static void usage(int argc, char **argv)
>   		" -d <0-9>              set output verbosity; 0=quiet, 9=verbose (default=%i)\n"
>   		" -o options            comma-separated list of mount options\n"
>   		" -t type[.subtype]     filesystem type (and optional subtype)\n"
> -		"                       subtypes: fuse, local, nbd\n"
> +		"                       subtypes: fuse, local, nbd" EROFSMOUNT_FANOTIFY_HELP "\n"
>   		" -u                    unmount the filesystem\n"
>   		"    --disconnect       abort an existing NBD device forcibly\n"
>   		"    --reattach         reattach to an existing NBD device\n"
> @@ -324,6 +339,13 @@ static int erofsmount_parse_options(int argc, char **argv)
>   					mountcfg.backend = EROFSLOCAL;
>   				} else if (!strcmp(dot + 1, "nbd")) {
>   					mountcfg.backend = EROFSNBD;
> +				} else if (!strcmp(dot + 1, "fanotify")) {
> +#ifdef EROFS_FANOTIFY_ENABLED
> +					mountcfg.backend = EROFSFANOTIFY;
> +#else
> +					erofs_err("fanotify backend is not enabled at build time");
> +					return -EINVAL;
> +#endif
>   				} else {
>   					erofs_err("invalid filesystem subtype `%s`", dot + 1);
>   					return -EINVAL;
> @@ -1342,6 +1364,629 @@ out_err:
>   	return -errno;
>   }
>   
> +#ifdef EROFS_FANOTIFY_ENABLED
> +struct erofsmount_fanotify_state {
> +	pid_t pid;
> +	char *mountpoint;
> +	char *source;
> +};
> +
> +static void erofsmount_free_fanotify_state(struct erofsmount_fanotify_state *state)
> +{
> +	free(state->mountpoint);
> +	free(state->source);
> +	state->mountpoint = NULL;
> +	state->source = NULL;
> +}
> +
> +static int erofsmount_write_fanotify_state(const char *state_path, pid_t pid,
> +					   const char *mountpoint,
> +					   const char *source)
> +{
> +	struct erofsmount_fanotify_state state;
> +	char *tmp_path = NULL;
> +	FILE *f = NULL;
> +	int fd = -1, err;
> +
> +	if (mkdir(EROFSMOUNT_RUNTIME_DIR, 0700) < 0 && errno != EEXIST)
> +		return -errno;
> +	if (mkdir(EROFSMOUNT_FANOTIFY_STATE_DIR, 0700) < 0 &&
> +	    errno != EEXIST)
> +		return -errno;
> +
> +	state.pid = pid;
> +	state.mountpoint = (char *)mountpoint;
> +	state.source = (char *)source;
> +
> +	if (asprintf(&tmp_path, "%s.tmpXXXXXX", state_path) < 0)
> +		return -ENOMEM;
> +
> +	fd = mkstemp(tmp_path);
> +	if (fd < 0) {
> +		err = -errno;
> +		goto out;
> +	}
> +
> +	f = fdopen(fd, "w");
> +	if (!f) {
> +		err = -errno;
> +		goto out;
> +	}
> +	fd = -1;
> +
> +	if (fprintf(f, "%d\n%s\n%s\n", state.pid, state.mountpoint,
> +		    state.source) < 0 || fflush(f) == EOF) {
> +		err = errno ? -errno : -EIO;
> +		goto out;
> +	}
> +
> +	if (fsync(fileno(f)) < 0) {
> +		err = -errno;
> +		goto out;
> +	}
> +
> +	if (fclose(f) < 0) {
> +		err = -errno;
> +		f = NULL;
> +		goto out;
> +	}
> +	f = NULL;
> +
> +	if (rename(tmp_path, state_path) < 0) {
> +		err = -errno;
> +		goto out;
> +	}
> +
> +	err = 0;
> +out:
> +	if (f)
> +		fclose(f);
> +	else if (fd >= 0)
> +		close(fd);
> +	if (err && tmp_path)
> +		unlink(tmp_path);
> +	free(tmp_path);
> +	return err;
> +}
> +
> +static int erofsmount_read_fanotify_state(const char *state_path,
> +					  struct erofsmount_fanotify_state *state)
> +{
> +	FILE *f;
> +	size_t n = 0;
> +	int err = 0;
> +
> +	memset(state, 0, sizeof(*state));
> +
> +	f = fopen(state_path, "r");
> +	if (!f)
> +		return -errno;
> +
> +	if (fscanf(f, "%d", &state->pid) != 1)
> +		err = -EINVAL;
> +	else if (fgetc(f) != '\n')
> +		err = -EINVAL;
> +	else if (getline(&state->mountpoint, &n, f) < 0)
> +		err = feof(f) ? -EINVAL : -errno;
> +	else if (getline(&state->source, &n, f) < 0)
> +		err = feof(f) ? -EINVAL : -errno;
> +	fclose(f);
> +	if (err) {
> +		erofsmount_free_fanotify_state(state);
> +		return err;
> +	}
> +
> +	state->mountpoint[strcspn(state->mountpoint, "\n")] = '\0';
> +	state->source[strcspn(state->source, "\n")] = '\0';
> +	return err;
> +}
> +
> +static int erofsmount_cleanup_fanotify_worker(const char *mountpoint,
> +					      const char *source)
> +{
> +	DIR *dir;
> +	struct dirent *de;
> +	int err = 0;
> +
> +	dir = opendir(EROFSMOUNT_FANOTIFY_STATE_DIR);
> +	if (!dir) {
> +		if (errno == ENOENT)
> +			return 0;
> +		return -errno;
> +	}
> +
> +	while ((de = readdir(dir)) != NULL) {
> +		struct erofsmount_fanotify_state state;
> +		char *state_path;
> +
> +		if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
> +			continue;
> +		if (!strstr(de->d_name, ".state"))
> +			continue;
> +		if (asprintf(&state_path, "%s/%s", EROFSMOUNT_FANOTIFY_STATE_DIR,
> +			     de->d_name) < 0) {
> +			err = -ENOMEM;
> +			goto out;
> +		}
> +
> +		err = erofsmount_read_fanotify_state(state_path, &state);
> +		if (err == -ENOENT) {
> +			free(state_path);
> +			err = 0;
> +			continue;
> +		}
> +		if (err) {
> +			free(state_path);
> +			goto out;
> +		}
> +		if (strcmp(state.mountpoint, mountpoint) ||
> +		    strcmp(state.source, source)) {
> +			erofsmount_free_fanotify_state(&state);
> +			free(state_path);
> +			continue;
> +		}
> +		if (kill(state.pid, SIGTERM) < 0 && errno != ESRCH)
> +			err = -errno;
> +		else if (unlink(state_path) < 0 && errno != ENOENT)
> +			err = -errno;
> +		erofsmount_free_fanotify_state(&state);
> +		free(state_path);
> +		goto out;
> +	}
> +out:
> +	closedir(dir);
> +	if (!err)
> +		return 0;
> +	return err;
> +}
> +
> +struct erofsmount_fanotify_ctx {
> +	struct erofs_vfile vd;		/* OCI virtual device */
> +	int sparse_fd;			/* sparse file descriptor */
> +	int fan_fd;			/* fanotify fd */
> +	char *sparse_path;		/* path to sparse file */
> +	u64 image_size;			/* blob size */
> +};
> +
> +static int erofsmount_create_sparse_file(struct erofsmount_fanotify_ctx *ctx,
> +					 u64 size, const char *blob_digest)
> +{
> +	char filepath[PATH_MAX];
> +	const char *hex_digest;
> +	int fd, err;
> +
> +	/* Extract hex part from "sha256:xxxx..." */
> +	if (!blob_digest || strncmp(blob_digest, "sha256:", 7) != 0)
> +		return -EINVAL;
> +	hex_digest = blob_digest + 7;
> +
> +	/* Construct file path using blob SHA256 */
> +	snprintf(filepath, sizeof(filepath), EROFSMOUNT_CACHE_DIR "/%s",
> +		 hex_digest);
> +
> +	/* Try to open existing file or create new one */
> +	fd = open(filepath, O_RDWR | O_CREAT, 0600);
> +	if (fd < 0 && errno == ENOENT) {
> +		err = mkdir(EROFSMOUNT_CACHE_DIR, 0700);
> +		if (err)
> +			return -errno;
> +		fd = open(filepath, O_RDWR | O_CREAT, 0600);
> +	}
> +	if (fd < 0)
> +		return -errno;
> +
> +	ctx->sparse_path = strdup(filepath);
> +	if (!ctx->sparse_path) {
> +		err = -ENOMEM;
> +		goto err_path;
> +	}
> +
> +	/* Set file size (creates sparse file) */
> +	if (ftruncate(fd, size) < 0) {
> +		err = -errno;
> +		goto err_ftruncate;
> +	}
> +
> +	ctx->sparse_fd = fd;
> +	ctx->image_size = size;
> +
> +	erofs_dbg("Created local sparse file %s (size: %llu bytes)",
> +		  ctx->sparse_path, (unsigned long long)size);
> +	return 0;
> +
> +err_ftruncate:
> +	free(ctx->sparse_path);
> +	ctx->sparse_path = NULL;
> +err_path:
> +	close(fd);
> +	unlink(filepath);
> +	return err;
> +}
> +
> +static bool erofsmount_range_in_sparse(int fd, u64 offset, size_t length)
> +{
> +	off_t data_start, hole_start;
> +
> +	/* Check if data exists at offset */
> +	data_start = lseek(fd, offset, SEEK_DATA);
> +	if (data_start < 0) {
> +		if (errno == ENXIO)
> +			return false;  /* No data in file at or after offset */
> +		return false;  /* Error, assume not present */
> +	}
> +
> +	/* If data doesn't start at our offset, range is not fully present */
> +	if ((u64)data_start != offset)
> +		return false;
> +
> +	/* Check if there's a hole before the end of our range */
> +	hole_start = lseek(fd, offset, SEEK_HOLE);
> +	if (hole_start < 0)
> +		return false;
> +
> +	/* If hole starts before our range ends, data is not fully present */
> +	if ((u64)hole_start < offset + length)
> +		return false;
> +
> +	return true;
> +}
> +
> +static int erofsmount_resolve_fanotify_blob(const struct ocierofs_config *oci_cfg,
> +					    char **digest, u64 *image_size)
> +{
> +	struct ocierofs_ctx oci_ctx = {};
> +	int err, i = -1;
> +
> +	err = ocierofs_ctx_init(&oci_ctx, oci_cfg);
> +	if (err)
> +		return err;
> +
> +	if (oci_ctx.blob_digest) {
> +		for (i = 0; i < oci_ctx.layer_count; ++i) {
> +			if (!strcmp(oci_ctx.layers[i]->digest, oci_ctx.blob_digest))
> +				break;
> +		}
> +		if (i >= oci_ctx.layer_count) {
> +			err = -ENOENT;
> +			goto out;
> +		}
> +	} else if (oci_ctx.layer_count == 1) {
> +		i = 0;
> +	} else {
> +		erofs_err("fanotify backend requires exactly one OCI blob; use oci.blob= or oci.layer=");
> +		err = -EINVAL;
> +		goto out;
> +	}
> +
> +	*digest = strdup(oci_ctx.layers[i]->digest);
> +	if (!*digest) {
> +		err = -ENOMEM;
> +		goto out;
> +	}
> +	*image_size = oci_ctx.layers[i]->size;
> +	err = 0;
> +
> +out:
> +	ocierofs_ctx_cleanup(&oci_ctx);
> +	return err;
> +}
> +
> +static int erofs_fanotify_handle_event(struct erofsmount_fanotify_ctx *ctx,
> +				       struct fanotify_event_metadata *meta,
> +				       void **fetch_buf, size_t *fetch_buf_size)
> +{
> +	struct erofs_fanotify_range range;
> +	bool allow_access = true;
> +	u64 offset;
> +	size_t length;
> +	ssize_t read_len, written;
> +	int err, resp_err;
> +
> +	err = erofs_fanotify_parse_range_event(meta, &range);
> +	if (err < 0) {
> +		erofs_err("Failed to parse fanotify event: %s",
> +			  erofs_strerror(err));
> +		allow_access = false;
> +		goto response;
> +	}
> +
> +	if (!(meta->mask & FAN_PRE_ACCESS))
> +		goto response;
> +
> +	offset = range.offset;
> +	length = range.count;
> +
> +	if (length == 0)
> +		length = min_t(u64, 1024 * 1024, ctx->image_size - offset);
> +
> +	if (offset >= ctx->image_size)
> +		goto response;
> +
> +	/* Clamp length to not exceed file size */
> +	if (offset + length > ctx->image_size)
> +		length = ctx->image_size - offset;
> +
> +	/* Check if data already exists locally in sparse file */
> +	if (erofsmount_range_in_sparse(ctx->sparse_fd, offset, length)) {
> +		erofs_dbg("Range [%llu, %llu) already local, skipping fetch",
> +			  (unsigned long long)offset,
> +			  (unsigned long long)(offset + length));
> +		goto response;
> +	}
> +
> +	if (*fetch_buf_size < length) {
> +		void *newbuf = realloc(*fetch_buf, length);
> +
> +		if (!newbuf) {
> +			erofs_err("Failed to allocate %zu bytes", length);
> +			err = -ENOMEM;
> +			allow_access = false;
> +			goto response;
> +		}
> +		*fetch_buf = newbuf;
> +		*fetch_buf_size = length;
> +	}
> +
> +	erofs_dbg("Fetching range [%llu, %llu)",
> +		  (unsigned long long)offset,
> +		  (unsigned long long)(offset + length));
> +
> +	read_len = erofs_io_pread(&ctx->vd, *fetch_buf, length, offset);
> +	if (read_len < 0) {
> +		erofs_err("Failed to fetch range [%llu, %llu): %s",
> +			  (unsigned long long)offset,
> +			  (unsigned long long)(offset + length),
> +			  erofs_strerror(read_len));
> +		err = read_len;
> +		allow_access = false;
> +		goto response;
> +	}
> +
> +	written = pwrite(ctx->sparse_fd, *fetch_buf, read_len, offset);
> +	if (written != read_len) {
> +		erofs_err("Failed to write to sparse file at offset %llu: %s",
> +			  (unsigned long long)offset,
> +			  written < 0 ? strerror(errno) : "short write");
> +		err = written < 0 ? -errno : -EIO;
> +		allow_access = false;
> +		goto response;
> +	}
> +
> +	fsync(ctx->sparse_fd);
> +	err = 0;
> +
> +response:
> +	resp_err = erofs_fanotify_respond(ctx->fan_fd, meta->fd, allow_access);
> +	if (meta->fd >= 0)
> +		close(meta->fd);
> +	return resp_err ? resp_err : err;
> +}
> +
> +static int erofsmount_fanotify_loop(struct erofsmount_fanotify_ctx *ctx)
> +{
> +	char event_buf[4096] __attribute__((aligned(8)));
> +	void *fetch_buf = NULL;
> +	size_t fetch_buf_size = 0;
> +	struct pollfd pfd;
> +	int err = 0;
> +
> +	pfd.fd = ctx->fan_fd;
> +	pfd.events = POLLIN;
> +
> +	while (1) {
> +		struct fanotify_event_metadata *meta;
> +		ssize_t len, remaining;
> +
> +		len = read(ctx->fan_fd, event_buf, sizeof(event_buf));

Can we wrap it up into `lib/backends/fanotify.c` as well?

I think mount.erofs shouldn't care the loop,
struct fanotify_event_metadata and
FAN_EVENT_NEXT for example.

Otherwise it looks good to me.

Thanks,
Gao Xiang


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 2/2] erofs-utils: mount: add fanotify pre-content OCI backend
  2026-03-30 12:44 ` [PATCH 2/2] erofs-utils: mount: add fanotify pre-content OCI backend Yifan Zhao
  2026-03-31  1:53   ` Gao Xiang
@ 2026-03-31 13:14   ` Yifan Zhao
  2026-03-31 14:45     ` Gao Xiang
  1 sibling, 1 reply; 5+ messages in thread
From: Yifan Zhao @ 2026-03-31 13:14 UTC (permalink / raw)
  To: hsiangkao, linux-erofs; +Cc: jingrui, zhukeqian1, zhaoyifan28, hudsonzhu

Add a fanotify-backed mount mode for OCI sources that uses
FAN_PRE_ACCESS permission events to populate a local sparse file
on demand before the kernel consumes the requested data.

The new erofs.fanotify subtype resolves a single OCI blob,
creates a sparse cache file, and runs a fanotify event loop
that fetches missing ranges before allowing access to proceed.

A pid file recording the canonical mountpoint and sparse-file
source is written for unmount to track the corresponding worker.

[ Developed with assistance from GPT-5.4 ]
Signed-off-by: Yifan Zhao <zhaoyifan28@huawei.com>
---
 configure.ac            |  28 +++
 lib/Makefile.am         |   7 +
 lib/backends/fanotify.c | 283 ++++++++++++++++++++++++
 lib/liberofs_fanotify.h |  59 +++++
 lib/liberofs_oci.h      |   3 +
 lib/remotes/oci.c       |  10 +-
 mount/main.c            | 476 +++++++++++++++++++++++++++++++++++++++-
 7 files changed, 860 insertions(+), 6 deletions(-)
 create mode 100644 lib/backends/fanotify.c
 create mode 100644 lib/liberofs_fanotify.h

diff --git a/configure.ac b/configure.ac
index 8a8e9b3..45b8190 100644
--- a/configure.ac
+++ b/configure.ac
@@ -194,6 +194,10 @@ AC_ARG_ENABLE(oci,
                    [enable OCI registry based input support @<:@default=no@:>@]),
     [enable_oci="$enableval"],[enable_oci="no"])
 
+AC_ARG_ENABLE(fanotify,
+   [AS_HELP_STRING([--enable-fanotify], [enable fanotify pre-content backend @<:@default=no@:>@])],
+   [enable_fanotify="$enableval"], [enable_fanotify="no"])
+
 AC_ARG_ENABLE(fuse,
    [AS_HELP_STRING([--enable-fuse], [enable erofsfuse @<:@default=no@:>@])],
    [enable_fuse="$enableval"], [enable_fuse="no"])
@@ -651,6 +655,24 @@ AS_IF([test "x$enable_oci" = "xyes"], [
   ])
 ], [have_oci="no"])
 
+have_fanotify="no"
+AS_IF([test "x$enable_fanotify" = "xyes"], [
+  AS_IF([test "x$build_linux" != "xyes"], [
+    AC_MSG_ERROR([fanotify backend requires Linux])
+  ])
+  AS_IF([test "x$have_oci" != "xyes"], [
+    AC_MSG_ERROR([fanotify backend requires --enable-oci])
+  ])
+  AC_CHECK_HEADERS([sys/fanotify.h], [
+    have_fanotify="yes"
+    AC_CHECK_TYPES([struct fanotify_event_info_range], [], [], [[
+#include <sys/fanotify.h>
+    ]])
+  ], [
+    AC_MSG_ERROR([fanotify backend disabled: missing sys/fanotify.h])
+  ])
+])
+
 # Configure openssl
 have_openssl="no"
 AS_IF([test "x$with_openssl" != "xno"], [
@@ -766,6 +788,7 @@ AM_CONDITIONAL([ENABLE_LIBXML2], [test "x${have_libxml2}" = "xyes"])
 AM_CONDITIONAL([ENABLE_S3], [test "x${have_s3}" = "xyes"])
 AM_CONDITIONAL([ENABLE_STATIC_FUSE], [test "x${enable_static_fuse}" = "xyes"])
 AM_CONDITIONAL([ENABLE_OCI], [test "x${have_oci}" = "xyes"])
+AM_CONDITIONAL([ENABLE_FANOTIFY], [test "x${have_fanotify}" = "xyes"])
 
 if test "x$have_uuid" = "xyes"; then
   AC_DEFINE([HAVE_LIBUUID], 1, [Define to 1 if libuuid is found])
@@ -842,6 +865,11 @@ if test "x$have_oci" = "xyes"; then
   AC_DEFINE([OCIEROFS_ENABLED], 1, [Define to 1 if OCI registry is enabled])
 fi
 
+if test "x$have_fanotify" = "xyes"; then
+  AC_DEFINE([EROFS_FANOTIFY_ENABLED], 1,
+	    [Define to 1 if fanotify backend is enabled])
+fi
+
 # Dump maximum block size
 AS_IF([test "x$erofs_cv_max_block_size" = "x"],
       [$erofs_cv_max_block_size = 4096], [])
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 77f6fd8..5f8812f 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -36,6 +36,10 @@ noinst_HEADERS = $(top_srcdir)/include/erofs_fs.h \
       $(top_srcdir)/lib/liberofs_s3.h
 
 noinst_HEADERS += compressor.h
+if ENABLE_FANOTIFY
+noinst_HEADERS += $(top_srcdir)/lib/liberofs_fanotify.h
+endif
+
 liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \
 		      namei.c data.c compress.c compressor.c zmap.c decompress.c \
 		      compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \
@@ -88,6 +92,9 @@ if OS_LINUX
 liberofs_la_CFLAGS += ${libnl3_CFLAGS}
 liberofs_la_LDFLAGS += ${libnl3_LIBS}
 liberofs_la_SOURCES += backends/nbd.c
+if ENABLE_FANOTIFY
+liberofs_la_SOURCES += backends/fanotify.c
+endif
 endif
 liberofs_la_SOURCES += remotes/oci.c remotes/docker_config.c
 liberofs_la_CFLAGS += ${json_c_CFLAGS}
diff --git a/lib/backends/fanotify.c b/lib/backends/fanotify.c
new file mode 100644
index 0000000..bbe131a
--- /dev/null
+++ b/lib/backends/fanotify.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include "erofs/err.h"
+#include "erofs/print.h"
+#include "liberofs_fanotify.h"
+
+int erofs_fanotify_init_precontent(void)
+{
+	int fan_fd;
+
+	fan_fd = fanotify_init(EROFS_FAN_CLASS_PRE_CONTENT | FAN_CLOEXEC | FAN_NONBLOCK,
+			       O_RDONLY | O_LARGEFILE);
+	if (fan_fd < 0) {
+		erofs_err("fanotify_init failed: %s", strerror(errno));
+		return -errno;
+	}
+
+	return fan_fd;
+}
+
+int erofs_fanotify_mark_file(int fan_fd, const char *path)
+{
+	int err;
+
+	err = fanotify_mark(fan_fd, FAN_MARK_ADD, EROFS_FAN_PRE_ACCESS,
+			    AT_FDCWD, path);
+	if (err < 0) {
+		erofs_err("fanotify_mark failed for %s: %s", path, strerror(errno));
+		return -errno;
+	}
+
+	erofs_dbg("Marked %s for EROFS_FAN_PRE_ACCESS monitoring", path);
+	return 0;
+}
+
+static int erofs_fanotify_parse_range_event(const struct fanotify_event_metadata *meta,
+					    u64 *offset, u64 *count)
+{
+	const struct fanotify_event_info_header *info_hdr;
+	const erofs_fanotify_event_info_range_t *range_info;
+	const char *ptr, *end;
+
+	if (meta->metadata_len > meta->event_len) {
+		erofs_err("Invalid fanotify metadata length");
+		return -EIO;
+	}
+
+	if (meta->vers != FANOTIFY_METADATA_VERSION) {
+		erofs_err("Unsupported fanotify metadata version %d", meta->vers);
+		return -EINVAL;
+	}
+
+	/* Initialize range to full file (will be overridden if range info present) */
+	*offset = 0;
+	*count = 0;
+
+	/* Parse additional info records for range information */
+	ptr = (const char *)meta + meta->metadata_len;
+	end = (const char *)meta + meta->event_len;
+
+	while (ptr < end) {
+		size_t info_len;
+
+		if (end - ptr < sizeof(*info_hdr)) {
+			erofs_err("Incomplete fanotify event info header");
+			return -EIO;
+		}
+		info_hdr = (const struct fanotify_event_info_header *)ptr;
+		info_len = info_hdr->len;
+		if (info_len < sizeof(*info_hdr) || ptr + info_len > end) {
+			erofs_err("Invalid fanotify event info length");
+			return -EIO;
+		}
+
+		if (info_hdr->info_type == EROFS_FAN_EVENT_INFO_TYPE_RANGE) {
+			if (info_len < sizeof(*range_info)) {
+				erofs_err("Incomplete fanotify range info");
+				return -EIO;
+			}
+			range_info = (const erofs_fanotify_event_info_range_t *)ptr;
+			*offset = range_info->offset;
+			*count = range_info->count;
+			break;
+		}
+
+		ptr += info_hdr->len;
+	}
+
+	return 0;
+}
+
+static int erofs_fanotify_respond(int fan_fd, int event_fd, bool allow)
+{
+	struct fanotify_response response = {
+		.fd = event_fd,
+		.response = allow ? FAN_ALLOW : FAN_DENY,
+	};
+	ssize_t ret;
+
+	ret = write(fan_fd, &response, sizeof(response));
+	if (ret != sizeof(response)) {
+		erofs_err("Failed to respond to fanotify event: %s",
+			  ret < 0 ? strerror(errno) : "short write");
+		return ret < 0 ? -errno : -EIO;
+	}
+
+	return 0;
+}
+
+static bool erofs_fanotify_range_in_sparse(int fd, u64 offset, size_t length)
+{
+	off_t data_start, hole_start;
+
+	data_start = lseek(fd, offset, SEEK_DATA);
+	if (data_start < 0)
+		return false;
+	if ((u64)data_start != offset)
+		return false;
+
+	hole_start = lseek(fd, offset, SEEK_HOLE);
+	if (hole_start < 0)
+		return false;
+	if ((u64)hole_start < offset + length)
+		return false;
+
+	return true;
+}
+
+static int erofs_fanotify_handle_range(struct erofs_fanotify_ctx *ctx,
+				       u64 offset, u64 count)
+{
+	size_t length = count;
+	ssize_t read_len, written;
+
+	if (offset >= ctx->image_size)
+		return 0;
+
+	if (length == 0)
+		length = min_t(u64, 4 * 1024 * 1024, ctx->image_size - offset);
+	if (offset + length > ctx->image_size)
+		length = ctx->image_size - offset;
+
+	if (erofs_fanotify_range_in_sparse(ctx->sparse_fd, offset, length)) {
+		erofs_dbg("Range [%llu, %llu) already local, skipping fetch",
+			  (unsigned long long)offset,
+			  (unsigned long long)(offset + length));
+		return 0;
+	}
+
+	if (ctx->fetch_buf_size < length) {
+		void *newbuf = realloc(ctx->fetch_buf, length);
+
+		if (!newbuf) {
+			erofs_err("Failed to allocate %zu bytes", length);
+			return -ENOMEM;
+		}
+		ctx->fetch_buf = newbuf;
+		ctx->fetch_buf_size = length;
+	}
+
+	erofs_dbg("Fetching range [%llu, %llu)",
+		  (unsigned long long)offset,
+		  (unsigned long long)(offset + length));
+
+	read_len = erofs_io_pread(&ctx->vd, ctx->fetch_buf, length, offset);
+	if (read_len < 0) {
+		erofs_err("Failed to fetch range [%llu, %llu): %s",
+			  (unsigned long long)offset,
+			  (unsigned long long)(offset + length),
+			  erofs_strerror(read_len));
+		return read_len;
+	}
+
+	written = pwrite(ctx->sparse_fd, ctx->fetch_buf, read_len, offset);
+	if (written != read_len) {
+		erofs_err("Failed to write to sparse file at offset %llu: %s",
+			  (unsigned long long)offset,
+			  written < 0 ? strerror(errno) : "short write");
+		return written < 0 ? -errno : -EIO;
+	}
+
+	fsync(ctx->sparse_fd);
+	return 0;
+}
+
+static int erofs_fanotify_handle_event(struct erofs_fanotify_ctx *ctx,
+				       struct fanotify_event_metadata *meta)
+{
+	u64 offset, count;
+	bool allow_access = true;
+	int err = 0, resp_err;
+
+	erofs_dbg("Handling fanotify event: mask=0x%llx fd=%d pid=%d",
+		  (unsigned long long)meta->mask, meta->fd, meta->pid);
+
+	if ((meta->mask & EROFS_FAN_PRE_ACCESS)) {
+		err = erofs_fanotify_parse_range_event(meta, &offset, &count);
+		if (err < 0) {
+			allow_access = false;
+			goto response;
+		}
+
+		err = erofs_fanotify_handle_range(ctx, offset, count);
+		if (err < 0)
+			allow_access = false;
+	}
+
+response:
+	resp_err = erofs_fanotify_respond(ctx->fan_fd, meta->fd, allow_access);
+	if (meta->fd >= 0)
+		close(meta->fd);
+	return resp_err ? resp_err : err;
+}
+
+int erofs_fanotify_loop(struct erofs_fanotify_ctx *ctx)
+{
+	char event_buf[4096] __attribute__((aligned(8)));
+	struct pollfd pfd = {
+		.fd = ctx->fan_fd,
+		.events = POLLIN,
+	};
+	int err = 0;
+
+	if (!ctx)
+		return -EINVAL;
+
+	while (1) {
+		struct fanotify_event_metadata *meta;
+		ssize_t len, remaining;
+
+		len = read(ctx->fan_fd, event_buf, sizeof(event_buf));
+		if (len <= 0) {
+			if (len < 0) {
+				if (errno == EAGAIN) {
+					if (poll(&pfd, 1, -1) < 0) {
+						if (errno == EINTR)
+							continue;
+						err = -errno;
+						break;
+					}
+					continue;
+				}
+				if (errno == EINTR)
+					continue;
+				err = -errno;
+				if (err == -EPIPE) {
+					err = 0;
+					break;
+				}
+				erofs_err("Failed to read fanotify events: %s",
+					  strerror(errno));
+				break;
+			}
+			erofs_err("Unexpected EOF on fanotify fd");
+			err = -EIO;
+			break;
+		}
+
+		remaining = len;
+		for (meta = (struct fanotify_event_metadata *)event_buf;
+		     FAN_EVENT_OK(meta, remaining);
+		     meta = FAN_EVENT_NEXT(meta, remaining)) {
+			err = erofs_fanotify_handle_event(ctx, meta);
+			if (err < 0)
+				break;
+		}
+		if (err)
+			break;
+		if (remaining) {
+			erofs_err("Invalid or incomplete fanotify event buffer");
+			err = -EIO;
+			break;
+		}
+	}
+
+	return err;
+}
diff --git a/lib/liberofs_fanotify.h b/lib/liberofs_fanotify.h
new file mode 100644
index 0000000..965090f
--- /dev/null
+++ b/lib/liberofs_fanotify.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 */
+#ifndef __EROFS_LIB_LIBEROFS_FANOTIFY_H
+#define __EROFS_LIB_LIBEROFS_FANOTIFY_H
+
+#include "erofs/defs.h"
+#include "erofs/io.h"
+#include <sys/fanotify.h>
+
+/* FAN_PRE_ACCESS may not be defined in older headers */
+#ifndef FAN_PRE_ACCESS
+#define EROFS_FAN_PRE_ACCESS	0x00100000
+#else
+#define EROFS_FAN_PRE_ACCESS	FAN_PRE_ACCESS
+#endif
+
+#ifndef FAN_CLASS_PRE_CONTENT
+#define EROFS_FAN_CLASS_PRE_CONTENT	0x00000008
+#else
+#define EROFS_FAN_CLASS_PRE_CONTENT	FAN_CLASS_PRE_CONTENT
+#endif
+
+#ifndef FAN_EVENT_INFO_TYPE_RANGE
+#define EROFS_FAN_EVENT_INFO_TYPE_RANGE	6
+#else
+#define EROFS_FAN_EVENT_INFO_TYPE_RANGE	FAN_EVENT_INFO_TYPE_RANGE
+#endif
+
+/* Provide a local alias for fanotify_event_info_range compatibility. */
+#ifndef HAVE_STRUCT_FANOTIFY_EVENT_INFO_RANGE
+typedef struct erofs_fanotify_event_info_range {
+	struct fanotify_event_info_header hdr;
+	__u32 pad;
+	__u64 offset;
+	__u64 count;
+} erofs_fanotify_event_info_range_t;
+#else
+typedef struct fanotify_event_info_range erofs_fanotify_event_info_range_t;
+#endif
+
+struct erofs_fanotify_ctx {
+	struct erofs_vfile vd;
+	int sparse_fd;
+	int fan_fd;
+	char *sparse_path;
+	void *fetch_buf;
+	size_t fetch_buf_size;
+	u64 image_size;
+};
+
+/* Initialize fanotify with EROFS_FAN_CLASS_PRE_CONTENT */
+int erofs_fanotify_init_precontent(void);
+
+/* Mark file for EROFS_FAN_PRE_ACCESS monitoring */
+int erofs_fanotify_mark_file(int fan_fd, const char *path);
+
+/* Run the fanotify event loop for a sparse-file backed OCI context. */
+int erofs_fanotify_loop(struct erofs_fanotify_ctx *ctx);
+
+#endif
diff --git a/lib/liberofs_oci.h b/lib/liberofs_oci.h
index 2243c82..3b3d66d 100644
--- a/lib/liberofs_oci.h
+++ b/lib/liberofs_oci.h
@@ -76,6 +76,9 @@ struct ocierofs_iostream {
  */
 int ocierofs_build_trees(struct erofs_importer *importer,
 			 const struct ocierofs_config *cfg);
+int ocierofs_ctx_init(struct ocierofs_ctx *ctx,
+		      const struct ocierofs_config *cfg);
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx);
 int ocierofs_io_open(struct erofs_vfile *vf, const struct ocierofs_config *cfg);
 
 char *ocierofs_encode_userpass(const char *username, const char *password);
diff --git a/lib/remotes/oci.c b/lib/remotes/oci.c
index 47e8b27..f96be13 100644
--- a/lib/remotes/oci.c
+++ b/lib/remotes/oci.c
@@ -1144,7 +1144,7 @@ const char *ocierofs_get_platform_spec(void)
 }
 
 /**
- * ocierofs_init - Initialize OCI context
+ * ocierofs_ctx_init - Initialize OCI context
  * @ctx: OCI context structure to initialize
  * @config: OCI configuration
  *
@@ -1154,7 +1154,7 @@ const char *ocierofs_get_platform_spec(void)
  *
  * Return: 0 on success, negative errno on failure
  */
-static int ocierofs_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config)
+int ocierofs_ctx_init(struct ocierofs_ctx *ctx, const struct ocierofs_config *config)
 {
 	int ret;
 
@@ -1288,7 +1288,7 @@ out:
  * Clean up CURL handle, free all allocated string parameters, and
  * reset the OCI context structure to a clean state.
  */
-static void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
+void ocierofs_ctx_cleanup(struct ocierofs_ctx *ctx)
 {
 	if (!ctx)
 		return;
@@ -1316,7 +1316,7 @@ int ocierofs_build_trees(struct erofs_importer *importer,
 	int ret, i, end, fd;
 	u64 tar_offset = 0;
 
-	ret = ocierofs_init(&ctx, config);
+	ret = ocierofs_ctx_init(&ctx, config);
 	if (ret) {
 		ocierofs_ctx_cleanup(&ctx);
 		return ret;
@@ -1529,7 +1529,7 @@ int ocierofs_io_open(struct erofs_vfile *vfile, const struct ocierofs_config *cf
 	if (!ctx)
 		return -ENOMEM;
 
-	err = ocierofs_init(ctx, cfg);
+	err = ocierofs_ctx_init(ctx, cfg);
 	if (err)
 		goto out;
 
diff --git a/mount/main.c b/mount/main.c
index 350738d..488ce02 100644
--- a/mount/main.c
+++ b/mount/main.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0+
 #define _GNU_SOURCE
+#include <dirent.h>
 #include <fcntl.h>
 #include <getopt.h>
 #include <stdio.h>
@@ -18,6 +19,9 @@
 #include "../lib/liberofs_nbd.h"
 #include "../lib/liberofs_oci.h"
 #include "../lib/liberofs_gzran.h"
+#ifdef EROFS_FANOTIFY_ENABLED
+#include "../lib/liberofs_fanotify.h"
+#endif
 
 #ifdef HAVE_LINUX_LOOP_H
 #include <linux/loop.h>
@@ -40,12 +44,22 @@ struct loop_info {
 
 /* Device boundary probe */
 #define EROFSMOUNT_NBD_DISK_SIZE	(INT64_MAX >> 9)
+#define EROFSMOUNT_CACHE_DIR	"/var/cache/erofsmount"
+#define EROFSMOUNT_RUNTIME_DIR	"/run/erofsmount"
+#define EROFSMOUNT_FANOTIFY_STATE_DIR	EROFSMOUNT_RUNTIME_DIR "/fanotify"
+
+#ifdef EROFS_FANOTIFY_ENABLED
+#define EROFSMOUNT_FANOTIFY_HELP	", fanotify"
+#else
+#define EROFSMOUNT_FANOTIFY_HELP	""
+#endif
 
 enum erofs_backend_drv {
 	EROFSAUTO,
 	EROFSLOCAL,
 	EROFSFUSE,
 	EROFSNBD,
+	EROFSFANOTIFY,
 };
 
 enum erofsmount_mode {
@@ -95,7 +109,7 @@ static void usage(int argc, char **argv)
 		" -d <0-9>              set output verbosity; 0=quiet, 9=verbose (default=%i)\n"
 		" -o options            comma-separated list of mount options\n"
 		" -t type[.subtype]     filesystem type (and optional subtype)\n"
-		"                       subtypes: fuse, local, nbd\n"
+		"                       subtypes: fuse, local, nbd" EROFSMOUNT_FANOTIFY_HELP "\n"
 		" -u                    unmount the filesystem\n"
 		"    --disconnect       abort an existing NBD device forcibly\n"
 		"    --reattach         reattach to an existing NBD device\n"
@@ -324,6 +338,13 @@ static int erofsmount_parse_options(int argc, char **argv)
 					mountcfg.backend = EROFSLOCAL;
 				} else if (!strcmp(dot + 1, "nbd")) {
 					mountcfg.backend = EROFSNBD;
+				} else if (!strcmp(dot + 1, "fanotify")) {
+#ifdef EROFS_FANOTIFY_ENABLED
+					mountcfg.backend = EROFSFANOTIFY;
+#else
+					erofs_err("fanotify backend is not enabled at build time");
+					return -EINVAL;
+#endif
 				} else {
 					erofs_err("invalid filesystem subtype `%s`", dot + 1);
 					return -EINVAL;
@@ -1342,6 +1363,435 @@ out_err:
 	return -errno;
 }
 
+#ifdef EROFS_FANOTIFY_ENABLED
+struct erofsmount_fanotify_state {
+	pid_t pid;
+	char *mountpoint;
+	char *source;
+};
+
+static void erofsmount_free_fanotify_state(struct erofsmount_fanotify_state *state)
+{
+	free(state->mountpoint);
+	free(state->source);
+	state->mountpoint = NULL;
+	state->source = NULL;
+}
+
+static int erofsmount_write_fanotify_state(const char *state_path, pid_t pid,
+					   const char *mountpoint,
+					   const char *source)
+{
+	struct erofsmount_fanotify_state state;
+	char *tmp_path = NULL;
+	FILE *f = NULL;
+	int fd = -1, err;
+
+	if (mkdir(EROFSMOUNT_RUNTIME_DIR, 0700) < 0 && errno != EEXIST)
+		return -errno;
+	if (mkdir(EROFSMOUNT_FANOTIFY_STATE_DIR, 0700) < 0 &&
+	    errno != EEXIST)
+		return -errno;
+
+	state.pid = pid;
+	state.mountpoint = (char *)mountpoint;
+	state.source = (char *)source;
+
+	if (asprintf(&tmp_path, "%s.tmpXXXXXX", state_path) < 0)
+		return -ENOMEM;
+
+	fd = mkstemp(tmp_path);
+	if (fd < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	f = fdopen(fd, "w");
+	if (!f) {
+		err = -errno;
+		goto out;
+	}
+	fd = -1;
+
+	if (fprintf(f, "%d\n%s\n%s\n", state.pid, state.mountpoint,
+		    state.source) < 0 || fflush(f) == EOF) {
+		err = errno ? -errno : -EIO;
+		goto out;
+	}
+
+	if (fsync(fileno(f)) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	if (fclose(f) < 0) {
+		err = -errno;
+		f = NULL;
+		goto out;
+	}
+	f = NULL;
+
+	if (rename(tmp_path, state_path) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	err = 0;
+out:
+	if (f)
+		fclose(f);
+	else if (fd >= 0)
+		close(fd);
+	if (err && tmp_path)
+		unlink(tmp_path);
+	free(tmp_path);
+	return err;
+}
+
+static int erofsmount_read_fanotify_state(const char *state_path,
+					  struct erofsmount_fanotify_state *state)
+{
+	FILE *f;
+	size_t n = 0;
+	int err = 0;
+
+	memset(state, 0, sizeof(*state));
+
+	f = fopen(state_path, "r");
+	if (!f)
+		return -errno;
+
+	if (fscanf(f, "%d", &state->pid) != 1)
+		err = -EINVAL;
+	else if (fgetc(f) != '\n')
+		err = -EINVAL;
+	else if (getline(&state->mountpoint, &n, f) < 0)
+		err = feof(f) ? -EINVAL : -errno;
+	else if (getline(&state->source, &n, f) < 0)
+		err = feof(f) ? -EINVAL : -errno;
+	fclose(f);
+	if (err) {
+		erofsmount_free_fanotify_state(state);
+		return err;
+	}
+
+	state->mountpoint[strcspn(state->mountpoint, "\n")] = '\0';
+	state->source[strcspn(state->source, "\n")] = '\0';
+	return err;
+}
+
+static int erofsmount_cleanup_fanotify_worker(const char *mountpoint,
+					      const char *source)
+{
+	DIR *dir;
+	struct dirent *de;
+	int err = 0;
+
+	dir = opendir(EROFSMOUNT_FANOTIFY_STATE_DIR);
+	if (!dir) {
+		if (errno == ENOENT)
+			return 0;
+		return -errno;
+	}
+
+	while ((de = readdir(dir)) != NULL) {
+		struct erofsmount_fanotify_state state;
+		char *state_path;
+
+		if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
+			continue;
+		if (!strstr(de->d_name, ".state"))
+			continue;
+		if (asprintf(&state_path, "%s/%s", EROFSMOUNT_FANOTIFY_STATE_DIR,
+			     de->d_name) < 0) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = erofsmount_read_fanotify_state(state_path, &state);
+		if (err == -ENOENT) {
+			free(state_path);
+			err = 0;
+			continue;
+		}
+		if (err) {
+			free(state_path);
+			goto out;
+		}
+		if (strcmp(state.mountpoint, mountpoint) ||
+		    strcmp(state.source, source)) {
+			erofsmount_free_fanotify_state(&state);
+			free(state_path);
+			continue;
+		}
+		if (kill(state.pid, SIGTERM) < 0 && errno != ESRCH)
+			err = -errno;
+		else if (unlink(state_path) < 0 && errno != ENOENT)
+			err = -errno;
+		erofsmount_free_fanotify_state(&state);
+		free(state_path);
+		goto out;
+	}
+out:
+	closedir(dir);
+	if (!err)
+		return 0;
+	return err;
+}
+
+static int erofsmount_create_sparse_file(struct erofs_fanotify_ctx *ctx,
+					 u64 size, const char *blob_digest)
+{
+	char filepath[PATH_MAX];
+	const char *hex_digest;
+	int fd, err;
+
+	/* Extract hex part from "sha256:xxxx..." */
+	if (!blob_digest || strncmp(blob_digest, "sha256:", 7) != 0)
+		return -EINVAL;
+	hex_digest = blob_digest + 7;
+
+	/* Construct file path using blob SHA256 */
+	snprintf(filepath, sizeof(filepath), EROFSMOUNT_CACHE_DIR "/%s",
+		 hex_digest);
+
+	/* Try to open existing file or create new one */
+	fd = open(filepath, O_RDWR | O_CREAT, 0600);
+	if (fd < 0 && errno == ENOENT) {
+		err = mkdir(EROFSMOUNT_CACHE_DIR, 0700);
+		if (err)
+			return -errno;
+		fd = open(filepath, O_RDWR | O_CREAT, 0600);
+	}
+	if (fd < 0)
+		return -errno;
+
+	ctx->sparse_path = strdup(filepath);
+	if (!ctx->sparse_path) {
+		err = -ENOMEM;
+		goto err_path;
+	}
+
+	/* Set file size (creates sparse file) */
+	if (ftruncate(fd, size) < 0) {
+		err = -errno;
+		goto err_ftruncate;
+	}
+
+	ctx->sparse_fd = fd;
+	ctx->image_size = size;
+
+	erofs_dbg("Created local sparse file %s (size: %llu bytes)",
+		  ctx->sparse_path, (unsigned long long)size);
+	return 0;
+
+err_ftruncate:
+	free(ctx->sparse_path);
+	ctx->sparse_path = NULL;
+err_path:
+	close(fd);
+	unlink(filepath);
+	return err;
+}
+
+static int erofsmount_resolve_fanotify_blob(const struct ocierofs_config *oci_cfg,
+					    char **digest, u64 *image_size)
+{
+	struct ocierofs_ctx oci_ctx = {};
+	int err, i = -1;
+
+	err = ocierofs_ctx_init(&oci_ctx, oci_cfg);
+	if (err)
+		return err;
+
+	if (oci_ctx.blob_digest) {
+		for (i = 0; i < oci_ctx.layer_count; ++i) {
+			if (!strcmp(oci_ctx.layers[i]->digest, oci_ctx.blob_digest))
+				break;
+		}
+		if (i >= oci_ctx.layer_count) {
+			err = -ENOENT;
+			goto out;
+		}
+	} else if (oci_ctx.layer_count == 1) {
+		i = 0;
+	} else {
+		erofs_err("fanotify backend requires exactly one OCI blob; use oci.blob= or oci.layer=");
+		err = -EINVAL;
+		goto out;
+	}
+
+	*digest = strdup(oci_ctx.layers[i]->digest);
+	if (!*digest) {
+		err = -ENOMEM;
+		goto out;
+	}
+	*image_size = oci_ctx.layers[i]->size;
+	err = 0;
+
+out:
+	ocierofs_ctx_cleanup(&oci_ctx);
+	return err;
+}
+
+static void erofsmount_fanotify_ctx_cleanup(struct erofs_fanotify_ctx *ctx)
+{
+	if (ctx->fan_fd >= 0)
+		close(ctx->fan_fd);
+	if (ctx->sparse_fd >= 0)
+		close(ctx->sparse_fd);
+	if (ctx->vd.ops || ctx->vd.fd >= 0)
+		erofs_io_close(&ctx->vd);
+	free(ctx->fetch_buf);
+	free(ctx->sparse_path);
+}
+
+static int erofsmount_fanotify_child(struct erofs_fanotify_ctx *ctx,
+				     int pipefd)
+{
+	int err;
+
+	ctx->fan_fd = erofs_fanotify_init_precontent();
+	if (ctx->fan_fd < 0) {
+		err = ctx->fan_fd;
+		goto notify;
+	}
+
+	err = erofs_fanotify_mark_file(ctx->fan_fd, ctx->sparse_path);
+	if (err)
+		goto notify;
+
+	err = 0;
+notify:
+	write(pipefd, &err, sizeof(err));
+	close(pipefd);
+
+	if (err)
+		return err;
+
+	return erofs_fanotify_loop(ctx);
+}
+
+static int erofsmount_fanotify(struct erofsmount_source *source,
+			       const char *mountpoint, const char *fstype,
+			       int flags, const char *options)
+{
+	struct erofs_fanotify_ctx ctx = {
+		.vd = {.fd = -1},
+		.sparse_fd = -1,
+		.fan_fd = -1,
+	};
+	struct ocierofs_config layer_cfg;
+	char *blob_digest = NULL;
+	char *state_mountpoint = NULL;
+	char *state_path = NULL;
+	pid_t pid = -1;
+	int pipefd[2];
+	int err, child_err;
+	u64 image_size;
+
+	if (strcmp(fstype, "erofs")) {
+		fprintf(stderr, "unsupported filesystem type `%s`\n", fstype);
+		return -ENODEV;
+	}
+	flags |= MS_RDONLY;
+
+	if (source->ocicfg.tarindex_path || source->ocicfg.zinfo_path) {
+		erofs_err("fanotify backend does not support tarindex or zinfo");
+		return -EOPNOTSUPP;
+	}
+
+	state_mountpoint = realpath(mountpoint, NULL);
+	if (!state_mountpoint) {
+		err = -errno;
+		goto out;
+	}
+
+	err = erofsmount_resolve_fanotify_blob(&source->ocicfg, &blob_digest,
+					       &image_size);
+	if (err)
+		goto out;
+
+	layer_cfg = source->ocicfg;
+	layer_cfg.blob_digest = blob_digest;
+	layer_cfg.layer_index = -1;
+
+	err = ocierofs_io_open(&ctx.vd, &layer_cfg);
+	if (err)
+		goto out;
+
+	err = erofsmount_create_sparse_file(&ctx, image_size, blob_digest);
+	if (err)
+		goto out;
+
+	/* Create pipe for parent-child communication */
+	if (pipe(pipefd) < 0) {
+		err = -errno;
+		goto out;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		err = -errno;
+		close(pipefd[0]);
+		close(pipefd[1]);
+		goto out;
+	}
+
+	if (pid == 0) {
+		close(pipefd[0]);
+		err = erofsmount_fanotify_child(&ctx, pipefd[1]);
+		erofsmount_fanotify_ctx_cleanup(&ctx);
+		exit(err ? EXIT_FAILURE : EXIT_SUCCESS);
+	}
+
+	/* Wait for child to report fanotify initialization result */
+	close(pipefd[1]);
+	if (read(pipefd[0], &child_err, sizeof(child_err)) != sizeof(child_err))
+		child_err = -EPIPE;
+	close(pipefd[0]);
+
+	if (child_err) {
+		erofs_err("Child process failed: %s", erofs_strerror(child_err));
+		err = child_err;
+		goto kill_child;
+	}
+
+	err = mount(ctx.sparse_path, mountpoint, fstype, flags, options);
+	if (err < 0)
+		err = -errno;
+	if (err)
+		goto kill_child;
+
+	if (asprintf(&state_path, "%s/%d.state",
+		     EROFSMOUNT_FANOTIFY_STATE_DIR, pid) < 0) {
+		err = -ENOMEM;
+		goto out_umount;
+	}
+
+	err = erofsmount_write_fanotify_state(state_path, pid, state_mountpoint,
+					      ctx.sparse_path);
+	if (err)
+		goto out_umount;
+	erofs_dbg("Mounted %s at %s successfully", ctx.sparse_path, mountpoint);
+	goto out;
+
+out_umount:
+	(void)umount(mountpoint);
+kill_child:
+	if (pid > 0) {
+		(void)kill(pid, SIGTERM);
+		(void)waitpid(pid, NULL, 0);
+	}
+out:
+	free(state_path);
+	free(state_mountpoint);
+	erofsmount_fanotify_ctx_cleanup(&ctx);
+	free(blob_digest);
+	return err;
+}
+#endif
+
 int erofsmount_umount(char *target)
 {
 	char *device = NULL, *mountpoint = NULL;
@@ -1437,6 +1887,15 @@ int erofsmount_umount(char *target)
 			goto err_out;
 		}
 	}
+#ifdef EROFS_FANOTIFY_ENABLED
+	if (!isblk) {
+		err = erofsmount_cleanup_fanotify_worker(target, device);
+		if (err) {
+			close(fd);
+			goto err_out;
+		}
+	}
+#endif
 	err = fstat(fd, &st);
 	if (err < 0)
 		err = -errno;
@@ -1533,6 +1992,21 @@ int main(int argc, char *argv[])
 		goto exit;
 	}
 
+#ifdef EROFS_FANOTIFY_ENABLED
+	if (mountcfg.backend == EROFSFANOTIFY) {
+		if (mountsrc.type != EROFSMOUNT_SOURCE_OCI) {
+			erofs_err("Fanotify backend only supports OCI sources");
+			err = -EINVAL;
+			goto exit;
+		}
+		mountsrc.ocicfg.image_ref = mountcfg.device;
+		err = erofsmount_fanotify(&mountsrc, mountcfg.target,
+					  mountcfg.fstype, mountcfg.flags,
+					  mountcfg.options);
+		goto exit;
+	}
+#endif
+
 	if (mountcfg.force_loopdev)
 		goto loopmount;
 
-- 
2.47.3



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 2/2] erofs-utils: mount: add fanotify pre-content OCI backend
  2026-03-31 13:14   ` [PATCH v2 " Yifan Zhao
@ 2026-03-31 14:45     ` Gao Xiang
  0 siblings, 0 replies; 5+ messages in thread
From: Gao Xiang @ 2026-03-31 14:45 UTC (permalink / raw)
  To: Yifan Zhao, linux-erofs; +Cc: jingrui, zhukeqian1, hudsonzhu

Hi Yifan,

On 2026/3/31 21:14, Yifan Zhao wrote:
> Add a fanotify-backed mount mode for OCI sources that uses
> FAN_PRE_ACCESS permission events to populate a local sparse file
> on demand before the kernel consumes the requested data.
> 
> The new erofs.fanotify subtype resolves a single OCI blob,
> creates a sparse cache file, and runs a fanotify event loop
> that fetches missing ranges before allowing access to proceed.
> 
> A pid file recording the canonical mountpoint and sparse-file
> source is written for unmount to track the corresponding worker.
> 
> [ Developed with assistance from GPT-5.4 ]

I will apply this version, but some comments:

It should be marked as:
Assisted-by: AGENT_NAME:GPT-5.4

for example.

> Signed-off-by: Yifan Zhao <zhaoyifan28@huawei.com>
> ---
>   configure.ac            |  28 +++
>   lib/Makefile.am         |   7 +
>   lib/backends/fanotify.c | 283 ++++++++++++++++++++++++
>   lib/liberofs_fanotify.h |  59 +++++
>   lib/liberofs_oci.h      |   3 +
>   lib/remotes/oci.c       |  10 +-
>   mount/main.c            | 476 +++++++++++++++++++++++++++++++++++++++-
>   7 files changed, 860 insertions(+), 6 deletions(-)
>   create mode 100644 lib/backends/fanotify.c
>   create mode 100644 lib/liberofs_fanotify.h
> 

...

> +
> +static bool erofs_fanotify_range_in_sparse(int fd, u64 offset, size_t length)
> +{
> +	off_t data_start, hole_start;
> +
> +	data_start = lseek(fd, offset, SEEK_DATA);
> +	if (data_start < 0)
> +		return false;
> +	if ((u64)data_start != offset)
> +		return false;
> +
> +	hole_start = lseek(fd, offset, SEEK_HOLE);
> +	if (hole_start < 0)
> +		return false;
> +	if ((u64)hole_start < offset + length)
> +		return false;

Here I really hope we could switch to bitmaps
instead of relying on holes in the following commits.

> +
> +	return true;
> +}

...

> +
> +static int erofsmount_write_fanotify_state(const char *state_path, pid_t pid,
> +					   const char *mountpoint,
> +					   const char *source)
> +{
> +	struct erofsmount_fanotify_state state;
> +	char *tmp_path = NULL;
> +	FILE *f = NULL;
> +	int fd = -1, err;
> +
> +	if (mkdir(EROFSMOUNT_RUNTIME_DIR, 0700) < 0 && errno != EEXIST)
> +		return -errno;
> +	if (mkdir(EROFSMOUNT_FANOTIFY_STATE_DIR, 0700) < 0 &&
> +	    errno != EEXIST)
> +		return -errno;
> +
> +	state.pid = pid;
> +	state.mountpoint = (char *)mountpoint;
> +	state.source = (char *)source;
> +
> +	if (asprintf(&tmp_path, "%s.tmpXXXXXX", state_path) < 0)
> +		return -ENOMEM;
> +
> +	fd = mkstemp(tmp_path);
> +	if (fd < 0) {
> +		err = -errno;
> +		goto out;
> +	}
> +
> +	f = fdopen(fd, "w");
> +	if (!f) {
> +		err = -errno;
> +		goto out;
> +	}
> +	fd = -1;
> +
> +	if (fprintf(f, "%d\n%s\n%s\n", state.pid, state.mountpoint,
> +		    state.source) < 0 || fflush(f) == EOF) {

Here, I do think you could identify the mountpoint
using mnt_id (e.g. you could use `mnt_id` as
filename), see statx(2):

https://man7.org/linux/man-pages/man2/statx.2.html
STATX_MNT_ID.

unique mnt_id seems an overkill since we will delete
such files when umounting.

> +		err = errno ? -errno : -EIO;
> +		goto out;

...

> +
> +static int erofsmount_read_fanotify_state(const char *state_path,
> +					  struct erofsmount_fanotify_state *state)
> +{
> +	FILE *f;
> +	size_t n = 0;
> +	int err = 0;
> +
> +	memset(state, 0, sizeof(*state));
> +
> +	f = fopen(state_path, "r");
> +	if (!f)
> +		return -errno;
> +
> +	if (fscanf(f, "%d", &state->pid) != 1)
> +		err = -EINVAL;
> +	else if (fgetc(f) != '\n')
> +		err = -EINVAL;
> +	else if (getline(&state->mountpoint, &n, f) < 0)
> +		err = feof(f) ? -EINVAL : -errno;
> +	else if (getline(&state->source, &n, f) < 0)
> +		err = feof(f) ? -EINVAL : -errno;
> +	fclose(f);
> +	if (err) {
> +		erofsmount_free_fanotify_state(state);
> +		return err;
> +	}
> +
> +	state->mountpoint[strcspn(state->mountpoint, "\n")] = '\0';
> +	state->source[strcspn(state->source, "\n")] = '\0';
> +	return err;
> +}
> +
> +static int erofsmount_cleanup_fanotify_worker(const char *mountpoint,
> +					      const char *source)
> +{
> +	DIR *dir;
> +	struct dirent *de;
> +	int err = 0;
> +
> +	dir = opendir(EROFSMOUNT_FANOTIFY_STATE_DIR);
> +	if (!dir) {
> +		if (errno == ENOENT)
> +			return 0;
> +		return -errno;
> +	}
> +
> +	while ((de = readdir(dir)) != NULL) {
> +		struct erofsmount_fanotify_state state;
> +		char *state_path;
> +
> +		if (strcmp(de->d_name, ".") == 0 || strcmp(de->d_name, "..") == 0)
> +			continue;
> +		if (!strstr(de->d_name, ".state"))
> +			continue;
> +		if (asprintf(&state_path, "%s/%s", EROFSMOUNT_FANOTIFY_STATE_DIR,
> +			     de->d_name) < 0) {
> +			err = -ENOMEM;
> +			goto out;
> +		}
> +
> +		err = erofsmount_read_fanotify_state(state_path, &state);

same here, so that you don't need readdir() anymore, just
use mnt_id for indexing.
Thanks,
Gao Xiang
>   



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-03-31 14:45 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-30 12:44 [PATCH 1/2 RESEND] erofs-utils: mount: generalize nbd source types for multi-backend support Yifan Zhao
2026-03-30 12:44 ` [PATCH 2/2] erofs-utils: mount: add fanotify pre-content OCI backend Yifan Zhao
2026-03-31  1:53   ` Gao Xiang
2026-03-31 13:14   ` [PATCH v2 " Yifan Zhao
2026-03-31 14:45     ` Gao Xiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox