* [PATCH 7/7] fuse2fs: fix uninitialized variable warnings
From: Theodore Ts'o @ 2026-05-04 23:33 UTC (permalink / raw)
To: Ext4 Developers List; +Cc: Darrick J. Wong, Theodore Ts'o
In-Reply-To: <20260504233301.2345652-1-tytso@mit.edu>
Clang treats use of unitialized variables found by static analysis to
be serious enough that it will emit them by default (even without
-Wall). So let's fix them up.
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
fuse4fs/fuse4fs.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index b883083f8..928473268 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -2216,7 +2216,7 @@ static void op_mknod(fuse_req_t req, fuse_ino_t fino, const char *name,
const struct fuse_ctx *ctxt = fuse_req_ctx(req);
struct fuse4fs *ff = fuse4fs_get(req);
ext2_filsys fs;
- ext2_ino_t parent, child;
+ ext2_ino_t parent, child = 0;
errcode_t err;
int filetype;
gid_t gid;
@@ -2311,7 +2311,7 @@ static void op_mkdir(fuse_req_t req, fuse_ino_t fino, const char *name,
const struct fuse_ctx *ctxt = fuse_req_ctx(req);
struct fuse4fs *ff = fuse4fs_get(req);
ext2_filsys fs;
- ext2_ino_t parent, child;
+ ext2_ino_t parent, child = 0;
errcode_t err;
char *block;
blk64_t blk;
@@ -2990,7 +2990,7 @@ static void op_symlink(fuse_req_t req, const char *target, fuse_ino_t fino,
const struct fuse_ctx *ctxt = fuse_req_ctx(req);
struct fuse4fs *ff = fuse4fs_get(req);
ext2_filsys fs;
- ext2_ino_t parent, child;
+ ext2_ino_t parent, child = 0;
errcode_t err;
gid_t gid;
int ret = 0;
@@ -6042,7 +6042,7 @@ static int fuse4fs_main(struct fuse_args *args, struct fuse4fs *ff)
struct fuse_cmdline_opts opts;
struct fuse_session *se;
struct fuse_loop_config *loop_config = NULL;
- int ret;
+ int ret = 0;
if (fuse_parse_cmdline(args, &opts) != 0) {
ret = 1;
--
2.53.0
^ permalink raw reply related
* [PATCH 5/7] libsupport: remove the LIST_HEAD macro from list.h
From: Theodore Ts'o @ 2026-05-04 23:32 UTC (permalink / raw)
To: Ext4 Developers List; +Cc: Darrick J. Wong, Theodore Ts'o
In-Reply-To: <20260504233301.2345652-1-tytso@mit.edu>
The LIST_HEAD macro is defined by both the Linux kernel's list.h and
<sys/queue.h> (originally from BSD). The problem is that on MacOS,
some system header file pulls in <sys/queue.h> leading to a macro
redefiniction conflict. Since we don't use LIST_HEAD in e2fsprogs,
the simplest mitigation is to just comment out the definition of LIST_HEAD.
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Fixes: 37dafcc0894b ("libsupport: port the kernel list.h to libsupport")
---
lib/support/list.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/lib/support/list.h b/lib/support/list.h
index 0e00e446d..0ec8de525 100644
--- a/lib/support/list.h
+++ b/lib/support/list.h
@@ -36,8 +36,17 @@ static inline void list_head_destroy(struct list_head *list)
#define LIST_HEAD_INIT(name) { &(name), &(name) }
+#if 0
+/*
+ * This conflicts with the LIST_HEAD defined in <sys/queue.h>. We
+ * don't include it in e2fsprogs, but some systems' header files might
+ * pull it in, leading to the macro redefinition conflict.
+ * Fortunately, we also don't need LIST_HEAD in e2fsprogs, so just
+ * drop this convenience macro.
+ */
#define LIST_HEAD(name) \
struct list_head name = LIST_HEAD_INIT(name)
+#endif
/**
* INIT_LIST_HEAD - Initialize a list_head structure
--
2.53.0
^ permalink raw reply related
* [PATCH 6/7] libsupport: fix gcc -Wall warnings
From: Theodore Ts'o @ 2026-05-04 23:33 UTC (permalink / raw)
To: Ext4 Developers List; +Cc: Darrick J. Wong, Theodore Ts'o
In-Reply-To: <20260504233301.2345652-1-tytso@mit.edu>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
lib/support/cache.c | 12 +++++++++---
lib/support/cache.h | 2 +-
lib/support/list.h | 8 ++++----
3 files changed, 14 insertions(+), 8 deletions(-)
diff --git a/lib/support/cache.c b/lib/support/cache.c
index aec785347..ece0adece 100644
--- a/lib/support/cache.c
+++ b/lib/support/cache.c
@@ -15,6 +15,12 @@
#include <stdint.h>
#include <errno.h>
+#ifdef __GNUC__
+#define EXT2FS_ATTR(x) __attribute__(x)
+#else
+#define EXT2FS_ATTR(x)
+#endif
+
#include "config.h"
#include "list.h"
#include "cache.h"
@@ -711,7 +717,7 @@ cache_node_put(
void
cache_node_set_priority(
- struct cache * cache,
+ struct cache * cache EXT2FS_ATTR((unused)),
struct cache_node * node,
int priority)
{
@@ -820,7 +826,7 @@ cache_flush(
{
struct cache_hash *hash;
struct cache_node *node;
- int i;
+ unsigned int i;
bool still_dirty = false;
if (!cache->flush)
@@ -848,7 +854,7 @@ cache_report(
const char *name,
struct cache *cache)
{
- int i;
+ unsigned int i;
unsigned long count, index, total;
unsigned long hash_bucket_lengths[HASH_REPORT + 2] = { 0 };
diff --git a/lib/support/cache.h b/lib/support/cache.h
index 71fb9762f..cd0e8c20e 100644
--- a/lib/support/cache.h
+++ b/lib/support/cache.h
@@ -83,7 +83,7 @@ typedef unsigned int (*cache_node_resize_t)(const struct cache *c,
unsigned int curr_size,
int dir);
-static inline unsigned int cache_gradual_resize(const struct cache *cache,
+static inline unsigned int cache_gradual_resize(const struct cache *cache EXT2FS_ATTR((unused)),
unsigned int curr_size,
int dir)
{
diff --git a/lib/support/list.h b/lib/support/list.h
index 0ec8de525..54e8e2360 100644
--- a/lib/support/list.h
+++ b/lib/support/list.h
@@ -148,13 +148,13 @@ static __always_inline bool __list_del_entry_valid(struct list_head *entry)
return ret;
}
#else
-static inline bool __list_add_valid(struct list_head *new,
- struct list_head *prev,
- struct list_head *next)
+static inline bool __list_add_valid(struct list_head *new EXT2FS_ATTR((unused)),
+ struct list_head *prev EXT2FS_ATTR((unused)),
+ struct list_head *next EXT2FS_ATTR((unused)))
{
return true;
}
-static inline bool __list_del_entry_valid(struct list_head *entry)
+static inline bool __list_del_entry_valid(struct list_head *entry EXT2FS_ATTR((unused)))
{
return true;
}
--
2.53.0
^ permalink raw reply related
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Darrick J. Wong @ 2026-05-05 0:08 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-1-tytso@mit.edu>
On Mon, May 04, 2026 at 07:32:54PM -0400, Theodore Ts'o wrote:
> These are the patches that I've applied to the next branch to clean up
> found when trying to compile e2fsprogs on MacOS and via the github's
> continuous integration testing.
>
> Note: See .github/workflows/ci.yaml; it's a good reason to get a
> github account if you are doing a lot of e2fsprogs development.
> If you fork my e2fsprogs repository and push your changes to your github
> repo, you can get reports like the ones found here:
> https://github.com/tytso/e2fsprogs/actions
>
>
> Anyway, with these patches, e2fsprogs builds on MacOS (both using the
> github CI and on locally on my Macbook). Unfortunately fuse2fs has
> regressed in that non-root accesses are getting denied with a permission
> denied on MacOS, and this worked with e2fsprogs v1.47.4. Here's a
> lightly edited transcript from my Macbook Air:
Hm, curious. It's regrettable that I no longer have a Mac, and
therefore can't really do much investigating. If you give
-o default_permissions,allow_other , does that fix the problem?
If that fixes it, then fuse2fs has a bug somewhere in its own
permissions checking.
> % uname -a
> Darwin macsyma.local 25.4.0 Darwin Kernel Version 25.4.0: Thu Mar 19 19:33:09 PDT 2026; root:xnu-12377.101.15~1/RELEASE_ARM64_T8112 arm64 arm Darwin
> % ./mke2fs -Fq -t ext4 -d . /tmp/foo.img 1G
> % ./fuse2fs -o allow_other /tmp/foo.img /Users/tytso/mnt
> FUSE2FS (foo.img): Warning: fuse2fs does not support using the journal.
> There may be file system corruption or data loss if
> the file system is not gracefully unmounted.
> % stat /Users/tytso/mnt/revoke.o
> stat: cannot stat '/Users/tytso/mnt/revoke.o': Permission denied
> % sudo stat /Users/tytso/mnt/revoke.o
> File: /Users/tytso/mnt/revoke.o
> Size: 29728 Blocks: 64 IO Block: 16384 regular file
> Device: 55,270 Inode: 91 Links: 1
> Access: (0644/-rw-r--r--) Uid: (15806/ tytso) Gid: ( 20/ staff)
> Access: 2026-05-05 00:21:50.000000000 +0200
> Modify: 2026-05-05 00:21:50.000000000 +0200
> Change: 2026-05-05 00:21:50.000000000 +0200
> Birth: -
> % stat revoke.o
> File: revoke.o
> Size: 29728 Blocks: 64 IO Block: 4096 regular file
> Device: 1,18 Inode: 851374 Links: 1
> Access: (0644/-rw-r--r--) Uid: (15806/ tytso) Gid: ( 20/ staff)
> Access: 2026-05-05 00:21:50.903084819 +0200
> Modify: 2026-05-05 00:21:50.346338295 +0200
> Change: 2026-05-05 00:21:50.346580544 +0200
> Birth: 2026-05-05 00:21:50.293229053 +0200
> % sum revoke.o
> 45116 30 revoke.o
> % sum /Users/tytso/mnt/revoke.o
> sum: /Users/tytso/mnt/revoke.o: Permission denied
That's really strange. Does MacOS do any strange idmapping stuff?
I'm also curious if there's a particular commit that makes it all work
again? There are a few defaults that changed between fuse2 and fuse3,
and since MacFuse claims to have vendored both, it could very well be
the megacommit dropping the fuse2 api support.
--D
> So fuse2fs is broken on MacOS, but it's not *totally* broken, since you
> can still use it as root. I'll want to fix this before updating the
> master branch, but this is good enough for the next branch.
>
>
>
> Theodore Ts'o (7):
> libsupport: drop xbitops.h and define fls() if necessary
> configure.ac: fix disable fuse2fs/fuse4fs by default path
> libsupport: don't use bzero in cache.c
> fuse[24]fs: suppress clang warnings which were breaking the github CI
> libsupport: remove the LIST_HEAD macro from list.h
> libsupport: fix gcc -Wall warnings
> fuse2fs: fix uninitialized variable warnings
>
> configure | 8 +++
> configure.ac | 3 +
> fuse4fs/Makefile.in | 18 +++---
> fuse4fs/fuse4fs.c | 29 +++++++--
> lib/config.h.in | 3 +
> lib/support/Makefile.in | 2 +-
> lib/support/cache.c | 49 ++++++++++++---
> lib/support/cache.h | 2 +-
> lib/support/list.h | 17 ++++--
> lib/support/xbitops.h | 128 ----------------------------------------
> misc/fuse2fs.c | 21 +++++++
> 11 files changed, 125 insertions(+), 155 deletions(-)
> delete mode 100644 lib/support/xbitops.h
>
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 1/7] libsupport: drop xbitops.h and define fls() if necessary
From: Darrick J. Wong @ 2026-05-05 0:11 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-2-tytso@mit.edu>
On Mon, May 04, 2026 at 07:32:55PM -0400, Theodore Ts'o wrote:
> The new cache.c file doesn't actually need most of the functions in
> xbitops.h. It only needs fls(), and on some systems, like MacOS and
> *BSD systems, fls() is defined already. The other functions in
> xbitops.h are massively non-portable and break on non-Linux systems,
> especially MacOS.
>
> So define fls() if it's needed (on Linux), and drop xbitops.h.
>
> Fixes: 30b3c80ed6bc ("libsupport: add a cache")
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Works for me; in the end I didn't use it as many places as I thought I
was going to.
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> configure | 6 ++
> configure.ac | 1 +
> fuse4fs/Makefile.in | 18 +++---
> lib/config.h.in | 3 +
> lib/support/Makefile.in | 2 +-
> lib/support/cache.c | 33 ++++++++++-
> lib/support/xbitops.h | 128 ----------------------------------------
> 7 files changed, 51 insertions(+), 140 deletions(-)
> delete mode 100644 lib/support/xbitops.h
>
> diff --git a/configure b/configure
> index a97794121..a1a270e63 100755
> --- a/configure
> +++ b/configure
> @@ -13677,6 +13677,12 @@ if test "x$ac_cv_func_fdatasync" = xyes
> then :
> printf "%s\n" "#define HAVE_FDATASYNC 1" >>confdefs.h
>
> +fi
> +ac_fn_c_check_func "$LINENO" "fls" "ac_cv_func_fls"
> +if test "x$ac_cv_func_fls" = xyes
> +then :
> + printf "%s\n" "#define HAVE_FLS 1" >>confdefs.h
> +
> fi
> ac_fn_c_check_func "$LINENO" "fstat64" "ac_cv_func_fstat64"
> if test "x$ac_cv_func_fstat64" = xyes
> diff --git a/configure.ac b/configure.ac
> index b62553e3d..abce79594 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1233,6 +1233,7 @@ AC_CHECK_FUNCS(m4_flatten([
> fchown
> fcntl
> fdatasync
> + fls
> fstat64
> fsync
> ftruncate64
> diff --git a/fuse4fs/Makefile.in b/fuse4fs/Makefile.in
> index 9f3547c27..cecee2b25 100644
> --- a/fuse4fs/Makefile.in
> +++ b/fuse4fs/Makefile.in
> @@ -134,7 +134,7 @@ distclean: clean
> $(RM) -f .depend Makefile $(srcdir)/TAGS $(srcdir)/Makefile.in.old
>
> # +++ Dependency line eater +++
> -#
> +#
> # Makefile dependencies follow. This must be the last section in
> # the Makefile.in file
> #
> @@ -145,9 +145,10 @@ fuse4fs.o: $(srcdir)/fuse4fs.c $(top_builddir)/lib/config.h \
> $(top_srcdir)/lib/ext2fs/ext2_io.h $(top_builddir)/lib/ext2fs/ext2_err.h \
> $(top_srcdir)/lib/ext2fs/ext2_ext_attr.h $(top_srcdir)/lib/ext2fs/hashmap.h \
> $(top_srcdir)/lib/ext2fs/bitops.h $(top_srcdir)/lib/ext2fs/ext2fsP.h \
> - $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/version.h \
> - $(top_srcdir)/lib/e2p/e2p.h $(top_srcdir)/lib/support/cache.h \
> - $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/support/xbitops.h
> + $(top_srcdir)/lib/ext2fs/ext2fs.h $(top_srcdir)/lib/support/bthread.h \
> + $(top_srcdir)/lib/support/thread.h $(top_srcdir)/lib/support/list.h \
> + $(top_srcdir)/lib/support/cache.h $(top_srcdir)/version.h \
> + $(top_srcdir)/lib/e2p/e2p.h
> journal.o: $(srcdir)/../debugfs/journal.c $(top_builddir)/lib/config.h \
> $(top_builddir)/lib/dirpaths.h $(srcdir)/../debugfs/journal.h \
> $(top_srcdir)/e2fsck/jfs_user.h $(top_srcdir)/e2fsck/e2fsck.h \
> @@ -161,8 +162,7 @@ journal.o: $(srcdir)/../debugfs/journal.c $(top_builddir)/lib/config.h \
> $(top_srcdir)/lib/support/dqblk_v2.h \
> $(top_srcdir)/lib/support/quotaio_tree.h \
> $(top_srcdir)/lib/ext2fs/fast_commit.h $(top_srcdir)/lib/ext2fs/jfs_compat.h \
> - $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/ext2fs/compiler.h \
> - $(top_srcdir)/lib/ext2fs/kernel-jbd.h
> + $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/ext2fs/kernel-jbd.h
> revoke.o: $(srcdir)/../e2fsck/revoke.c $(srcdir)/../e2fsck/jfs_user.h \
> $(top_builddir)/lib/config.h $(top_builddir)/lib/dirpaths.h \
> $(srcdir)/../e2fsck/e2fsck.h $(top_srcdir)/lib/ext2fs/ext2_fs.h \
> @@ -175,8 +175,7 @@ revoke.o: $(srcdir)/../e2fsck/revoke.c $(srcdir)/../e2fsck/jfs_user.h \
> $(top_srcdir)/lib/support/dqblk_v2.h \
> $(top_srcdir)/lib/support/quotaio_tree.h \
> $(top_srcdir)/lib/ext2fs/fast_commit.h $(top_srcdir)/lib/ext2fs/jfs_compat.h \
> - $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/ext2fs/compiler.h \
> - $(top_srcdir)/lib/ext2fs/kernel-jbd.h
> + $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/ext2fs/kernel-jbd.h
> recovery.o: $(srcdir)/../e2fsck/recovery.c $(srcdir)/../e2fsck/jfs_user.h \
> $(top_builddir)/lib/config.h $(top_builddir)/lib/dirpaths.h \
> $(srcdir)/../e2fsck/e2fsck.h $(top_srcdir)/lib/ext2fs/ext2_fs.h \
> @@ -189,5 +188,4 @@ recovery.o: $(srcdir)/../e2fsck/recovery.c $(srcdir)/../e2fsck/jfs_user.h \
> $(top_srcdir)/lib/support/dqblk_v2.h \
> $(top_srcdir)/lib/support/quotaio_tree.h \
> $(top_srcdir)/lib/ext2fs/fast_commit.h $(top_srcdir)/lib/ext2fs/jfs_compat.h \
> - $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/ext2fs/compiler.h \
> - $(top_srcdir)/lib/ext2fs/kernel-jbd.h
> + $(top_srcdir)/lib/support/list.h $(top_srcdir)/lib/ext2fs/kernel-jbd.h
> diff --git a/lib/config.h.in b/lib/config.h.in
> index fd2520396..abba5e2c6 100644
> --- a/lib/config.h.in
> +++ b/lib/config.h.in
> @@ -124,6 +124,9 @@
> /* Define to 1 if you have the 'fdatasync' function. */
> #undef HAVE_FDATASYNC
>
> +/* Define to 1 if you have the 'fls' function. */
> +#undef HAVE_FLS
> +
> /* Define to 1 if fsmap_sizeof() is declared in linux/fsmap.h */
> #undef HAVE_FSMAP_SIZEOF
>
> diff --git a/lib/support/Makefile.in b/lib/support/Makefile.in
> index 6ff1c81d0..d20d6a984 100644
> --- a/lib/support/Makefile.in
> +++ b/lib/support/Makefile.in
> @@ -199,4 +199,4 @@ dict.o: $(srcdir)/dict.c $(top_builddir)/lib/config.h \
> devname.o: $(srcdir)/devname.c $(top_builddir)/lib/config.h \
> $(top_builddir)/lib/dirpaths.h $(srcdir)/devname.h $(srcdir)/nls-enable.h
> cache.o: $(srcdir)/cache.c $(top_builddir)/lib/config.h \
> - $(srcdir)/cache.h $(srcdir)/list.h $(srcdir)/xbitops.h
> + $(top_builddir)/lib/dirpaths.h $(srcdir)/list.h $(srcdir)/cache.h
> diff --git a/lib/support/cache.c b/lib/support/cache.c
> index 3a9e276f1..f9669e3fc 100644
> --- a/lib/support/cache.c
> +++ b/lib/support/cache.c
> @@ -7,6 +7,7 @@
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> +#include <strings.h>
> #include <unistd.h>
> #include <pthread.h>
> #include <stdbool.h>
> @@ -14,9 +15,9 @@
> #include <stdint.h>
> #include <errno.h>
>
> +#include "config.h"
> #include "list.h"
> #include "cache.h"
> -#include "xbitops.h"
>
> #undef CACHE_DEBUG
> /* #define CACHE_DEBUG 1 */
> @@ -32,6 +33,36 @@
> # define ASSERT(x) do { } while (0)
> #endif
>
> +#ifndef HAVE_FLS
> +static inline int fls(int x)
> +{
> + int r = 32;
> +
> + if (!x)
> + return 0;
> + if (!(x & 0xffff0000u)) {
> + x = (x & 0xffffu) << 16;
> + r -= 16;
> + }
> + if (!(x & 0xff000000u)) {
> + x = (x & 0xffffffu) << 8;
> + r -= 8;
> + }
> + if (!(x & 0xf0000000u)) {
> + x = (x & 0xfffffffu) << 4;
> + r -= 4;
> + }
> + if (!(x & 0xc0000000u)) {
> + x = (x & 0x3fffffffu) << 2;
> + r -= 2;
> + }
> + if (!(x & 0x80000000u)) {
> + r -= 1;
> + }
> + return r;
> +}
> +#endif
> +
> static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
>
> int
> diff --git a/lib/support/xbitops.h b/lib/support/xbitops.h
> deleted file mode 100644
> index 78a8f2a85..000000000
> --- a/lib/support/xbitops.h
> +++ /dev/null
> @@ -1,128 +0,0 @@
> -// SPDX-License-Identifier: GPL-2.0
> -#ifndef __BITOPS_H__
> -#define __BITOPS_H__
> -
> -/*
> - * fls: find last bit set.
> - */
> -
> -static inline int fls(int x)
> -{
> - int r = 32;
> -
> - if (!x)
> - return 0;
> - if (!(x & 0xffff0000u)) {
> - x = (x & 0xffffu) << 16;
> - r -= 16;
> - }
> - if (!(x & 0xff000000u)) {
> - x = (x & 0xffffffu) << 8;
> - r -= 8;
> - }
> - if (!(x & 0xf0000000u)) {
> - x = (x & 0xfffffffu) << 4;
> - r -= 4;
> - }
> - if (!(x & 0xc0000000u)) {
> - x = (x & 0x3fffffffu) << 2;
> - r -= 2;
> - }
> - if (!(x & 0x80000000u)) {
> - r -= 1;
> - }
> - return r;
> -}
> -
> -static inline int fls64(uint64_t x)
> -{
> - uint32_t h = x >> 32;
> - if (h)
> - return fls(h) + 32;
> - return fls(x);
> -}
> -
> -static inline unsigned fls_long(unsigned long l)
> -{
> - if (sizeof(l) == 4)
> - return fls(l);
> - return fls64(l);
> -}
> -
> -/*
> - * ffz: find first zero bit.
> - * Result is undefined if no zero bit exists.
> - */
> -#define ffz(x) ffs(~(x))
> -
> -/*
> - * XFS bit manipulation routines. Repeated here so that some programs
> - * don't have to link in all of libxfs just to have bit manipulation.
> - */
> -
> -/*
> - * masks with n high/low bits set, 64-bit values
> - */
> -static inline uint64_t mask64hi(int n)
> -{
> - return (uint64_t)-1 << (64 - (n));
> -}
> -static inline uint32_t mask32lo(int n)
> -{
> - return ((uint32_t)1 << (n)) - 1;
> -}
> -static inline uint64_t mask64lo(int n)
> -{
> - return ((uint64_t)1 << (n)) - 1;
> -}
> -
> -/* Get high bit set out of 32-bit argument, -1 if none set */
> -static inline int highbit32(uint32_t v)
> -{
> - return fls(v) - 1;
> -}
> -
> -/* Get high bit set out of 64-bit argument, -1 if none set */
> -static inline int highbit64(uint64_t v)
> -{
> - return fls64(v) - 1;
> -}
> -
> -/* Get low bit set out of 32-bit argument, -1 if none set */
> -static inline int lowbit32(uint32_t v)
> -{
> - return ffs(v) - 1;
> -}
> -
> -/* Get low bit set out of 64-bit argument, -1 if none set */
> -static inline int lowbit64(uint64_t v)
> -{
> - uint32_t w = (uint32_t)v;
> - int n = 0;
> -
> - if (w) { /* lower bits */
> - n = ffs(w);
> - } else { /* upper bits */
> - w = (uint32_t)(v >> 32);
> - if (w) {
> - n = ffs(w);
> - if (n)
> - n += 32;
> - }
> - }
> - return n - 1;
> -}
> -
> -/**
> - * __rounddown_pow_of_two() - round down to nearest power of two
> - * @n: value to round down
> - */
> -static inline __attribute__((const))
> -unsigned long __rounddown_pow_of_two(unsigned long n)
> -{
> - return 1UL << (fls_long(n) - 1);
> -}
> -
> -#define rounddown_pow_of_two(n) __rounddown_pow_of_two(n)
> -
> -#endif
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 2/7] configure.ac: fix disable fuse2fs/fuse4fs by default path
From: Darrick J. Wong @ 2026-05-05 0:13 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-3-tytso@mit.edu>
On Mon, May 04, 2026 at 07:32:56PM -0400, Theodore Ts'o wrote:
> If the fuse.h header file is not present, fix the "Disabling
> fuse[24]fs by default" codepath to actually disable trying to build
> fuse2fs or fuse4fs.
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> Fixes: 239f4b7ac05b ("fuse2fs: separate libfuse3 and fuse2fs detection in configure")
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Oops. I hadn't tried building without a fuse.h in a while. :/
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> configure | 2 ++
> configure.ac | 2 ++
> 2 files changed, 4 insertions(+)
>
> diff --git a/configure b/configure
> index a1a270e63..d941ff1f1 100755
> --- a/configure
> +++ b/configure
> @@ -14737,6 +14737,7 @@ else case e in #(
> { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Enabling fuse2fs by default" >&5
> printf "%s\n" "Enabling fuse2fs by default" >&6; }
> else
> + FUSE2FS_CMT="#"
> { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Disabling fuse2fs by default" >&5
> printf "%s\n" "Disabling fuse2fs by default" >&6; }
> fi
> @@ -14804,6 +14805,7 @@ else case e in #(
> { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Enabling fuse4fs by default" >&5
> printf "%s\n" "Enabling fuse4fs by default" >&6; }
> else
> + FUSE4FS_CMT="#"
> { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: Disabling fuse4fs by default" >&5
> printf "%s\n" "Disabling fuse4fs by default" >&6; }
> fi
> diff --git a/configure.ac b/configure.ac
> index abce79594..d8f40f5df 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1459,6 +1459,7 @@ AS_HELP_STRING([--disable-fuse2fs],[do not build fuse2fs]),
> then
> AC_MSG_RESULT([Enabling fuse2fs by default])
> else
> + FUSE2FS_CMT="#"
> AC_MSG_RESULT([Disabling fuse2fs by default])
> fi
> ]
> @@ -1496,6 +1497,7 @@ AS_HELP_STRING([--disable-fuse4fs],[do not build fuse4fs]),
> then
> AC_MSG_RESULT([Enabling fuse4fs by default])
> else
> + FUSE4FS_CMT="#"
> AC_MSG_RESULT([Disabling fuse4fs by default])
> fi
> ]
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 3/7] libsupport: don't use bzero in cache.c
From: Darrick J. Wong @ 2026-05-05 0:15 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-4-tytso@mit.edu>
On Mon, May 04, 2026 at 07:32:57PM -0400, Theodore Ts'o wrote:
> The bzero() function is deprecated and not available in some
> platforms. It's not needed in any case because we can just use an
> automatic initializer.
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> Fixes: 30b3c80ed6bc ("libsupport: add a cache")
Yeesh. What a sordid legacy this function has (see manpage). :(
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> lib/support/cache.c | 4 +---
> 1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/lib/support/cache.c b/lib/support/cache.c
> index f9669e3fc..aec785347 100644
> --- a/lib/support/cache.c
> +++ b/lib/support/cache.c
> @@ -850,7 +850,7 @@ cache_report(
> {
> int i;
> unsigned long count, index, total;
> - unsigned long hash_bucket_lengths[HASH_REPORT + 2];
> + unsigned long hash_bucket_lengths[HASH_REPORT + 2] = { 0 };
>
> if ((cache->c_hits + cache->c_misses) == 0)
> return;
> @@ -886,8 +886,6 @@ cache_report(
> cache->c_mrus[i].cm_count * 100 / cache->c_count);
>
> /* report hash bucket lengths */
> - bzero(hash_bucket_lengths, sizeof(hash_bucket_lengths));
> -
> for (i = 0; i < cache->c_hashsize; i++) {
> count = cache->c_hash[i].ch_count;
> if (count > HASH_REPORT)
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 4/7] fuse[24]fs: suppress clang warnings which were breaking the github CI
From: Darrick J. Wong @ 2026-05-05 0:20 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-5-tytso@mit.edu>
On Mon, May 04, 2026 at 07:32:58PM -0400, Theodore Ts'o wrote:
> fuse[24]fs.c defines some functions which might or might not be used
> depending on the version of the fuse library installed on the system.
> Unfortunately while gcc won't complain if there are static inline
> functions which are not used, clang will only suppress such complaints
> if the static inline functions are in a header file.
<groan>
> Since the github CI runs a test build with -Wall -Werror this was
> breaking the CI build.
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
> fuse4fs/fuse4fs.c | 21 +++++++++++++++++++++
> misc/fuse2fs.c | 21 +++++++++++++++++++++
> 2 files changed, 42 insertions(+)
>
> diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
> index 13e1aae4b..b883083f8 100644
> --- a/fuse4fs/fuse4fs.c
> +++ b/fuse4fs/fuse4fs.c
> @@ -502,6 +502,27 @@ static inline off_t FUSE4FS_FSB_TO_B(const struct fuse4fs *ff, blk64_t bno)
> return bno << ff->blocklog;
> }
>
> +#ifdef __clang__
> +#define SUPPRESS_UNUSED_FUNCTION(func) (void)func
> +/*
> + * These functions might or might not be used depending on the version
> + * of the fuse library installed on the system. Unfortunately while
> + * gcc won't complain if there are static inline functions which are
> + * not used, clang will only suppress such complaints if the static
> + * inline functions are in a header file.
> + */
> +void fuse2fs_shut_up_stupid_clang_warnings(void)
> +{
> + SUPPRESS_UNUSED_FUNCTION(fuse2fs_shut_up_stupid_clang_warnings);
Why is this stupid recursive shi ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
even necessary?
> + SUPPRESS_UNUSED_FUNCTION(round_up);
> + SUPPRESS_UNUSED_FUNCTION(round_down);
> + SUPPRESS_UNUSED_FUNCTION(FUSE4FS_B_TO_FSBT);
> + SUPPRESS_UNUSED_FUNCTION(FUSE4FS_OFF_IN_FSB);
> + SUPPRESS_UNUSED_FUNCTION(FUSE4FS_B_TO_FSB);
> + SUPPRESS_UNUSED_FUNCTION(FUSE4FS_FSB_TO_B);
Hrmm some of these aren't strictly necessary, but ... oh. The first
three are actively used in the punch hole helpers, but that only applies
on a platform that *supports* hole punching. So my guess is that these
clang warnings appear on macos, where there isn't holepunching?
--D
> +}
> +#endif
> +
> static double gettime_monotonic(void)
> {
> #ifdef CLOCK_MONOTONIC
> diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
> index 0f4781bc4..c46cfc236 100644
> --- a/misc/fuse2fs.c
> +++ b/misc/fuse2fs.c
> @@ -339,6 +339,27 @@ static inline off_t FUSE2FS_FSB_TO_B(const struct fuse2fs *ff, blk64_t bno)
> return bno << ff->blocklog;
> }
>
> +#ifdef __clang__
> +#define SUPPRESS_UNUSED_FUNCTION(func) (void)func
> +/*
> + * These functions might or might not be used depending on the version
> + * of the fuse library installed on the system. Unfortunately while
> + * gcc won't complain if there are static inline functions which are
> + * not used, clang will only suppress such complaints if the static
> + * inline functions are in a header file.
> + */
> +void fuse2fs_shut_up_stupid_clang_warnings(void)
> +{
> + SUPPRESS_UNUSED_FUNCTION(fuse2fs_shut_up_stupid_clang_warnings);
> + SUPPRESS_UNUSED_FUNCTION(round_up);
> + SUPPRESS_UNUSED_FUNCTION(round_down);
> + SUPPRESS_UNUSED_FUNCTION(FUSE2FS_B_TO_FSBT);
> + SUPPRESS_UNUSED_FUNCTION(FUSE2FS_OFF_IN_FSB);
> + SUPPRESS_UNUSED_FUNCTION(FUSE2FS_B_TO_FSB);
> + SUPPRESS_UNUSED_FUNCTION(FUSE2FS_FSB_TO_B);
> +}
> +#endif
> +
> static double gettime_monotonic(void)
> {
> #ifdef CLOCK_MONOTONIC
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 5/7] libsupport: remove the LIST_HEAD macro from list.h
From: Darrick J. Wong @ 2026-05-05 0:20 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-6-tytso@mit.edu>
On Mon, May 04, 2026 at 07:32:59PM -0400, Theodore Ts'o wrote:
> The LIST_HEAD macro is defined by both the Linux kernel's list.h and
> <sys/queue.h> (originally from BSD). The problem is that on MacOS,
> some system header file pulls in <sys/queue.h> leading to a macro
> redefiniction conflict. Since we don't use LIST_HEAD in e2fsprogs,
> the simplest mitigation is to just comment out the definition of LIST_HEAD.
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> Fixes: 37dafcc0894b ("libsupport: port the kernel list.h to libsupport")
Sounds fine to me.
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> lib/support/list.h | 9 +++++++++
> 1 file changed, 9 insertions(+)
>
> diff --git a/lib/support/list.h b/lib/support/list.h
> index 0e00e446d..0ec8de525 100644
> --- a/lib/support/list.h
> +++ b/lib/support/list.h
> @@ -36,8 +36,17 @@ static inline void list_head_destroy(struct list_head *list)
>
> #define LIST_HEAD_INIT(name) { &(name), &(name) }
>
> +#if 0
> +/*
> + * This conflicts with the LIST_HEAD defined in <sys/queue.h>. We
> + * don't include it in e2fsprogs, but some systems' header files might
> + * pull it in, leading to the macro redefinition conflict.
> + * Fortunately, we also don't need LIST_HEAD in e2fsprogs, so just
> + * drop this convenience macro.
> + */
> #define LIST_HEAD(name) \
> struct list_head name = LIST_HEAD_INIT(name)
> +#endif
>
> /**
> * INIT_LIST_HEAD - Initialize a list_head structure
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 6/7] libsupport: fix gcc -Wall warnings
From: Darrick J. Wong @ 2026-05-05 0:20 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-7-tytso@mit.edu>
On Mon, May 04, 2026 at 07:33:00PM -0400, Theodore Ts'o wrote:
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Looks good to me,
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> lib/support/cache.c | 12 +++++++++---
> lib/support/cache.h | 2 +-
> lib/support/list.h | 8 ++++----
> 3 files changed, 14 insertions(+), 8 deletions(-)
>
> diff --git a/lib/support/cache.c b/lib/support/cache.c
> index aec785347..ece0adece 100644
> --- a/lib/support/cache.c
> +++ b/lib/support/cache.c
> @@ -15,6 +15,12 @@
> #include <stdint.h>
> #include <errno.h>
>
> +#ifdef __GNUC__
> +#define EXT2FS_ATTR(x) __attribute__(x)
> +#else
> +#define EXT2FS_ATTR(x)
> +#endif
> +
> #include "config.h"
> #include "list.h"
> #include "cache.h"
> @@ -711,7 +717,7 @@ cache_node_put(
>
> void
> cache_node_set_priority(
> - struct cache * cache,
> + struct cache * cache EXT2FS_ATTR((unused)),
> struct cache_node * node,
> int priority)
> {
> @@ -820,7 +826,7 @@ cache_flush(
> {
> struct cache_hash *hash;
> struct cache_node *node;
> - int i;
> + unsigned int i;
> bool still_dirty = false;
>
> if (!cache->flush)
> @@ -848,7 +854,7 @@ cache_report(
> const char *name,
> struct cache *cache)
> {
> - int i;
> + unsigned int i;
> unsigned long count, index, total;
> unsigned long hash_bucket_lengths[HASH_REPORT + 2] = { 0 };
>
> diff --git a/lib/support/cache.h b/lib/support/cache.h
> index 71fb9762f..cd0e8c20e 100644
> --- a/lib/support/cache.h
> +++ b/lib/support/cache.h
> @@ -83,7 +83,7 @@ typedef unsigned int (*cache_node_resize_t)(const struct cache *c,
> unsigned int curr_size,
> int dir);
>
> -static inline unsigned int cache_gradual_resize(const struct cache *cache,
> +static inline unsigned int cache_gradual_resize(const struct cache *cache EXT2FS_ATTR((unused)),
> unsigned int curr_size,
> int dir)
> {
> diff --git a/lib/support/list.h b/lib/support/list.h
> index 0ec8de525..54e8e2360 100644
> --- a/lib/support/list.h
> +++ b/lib/support/list.h
> @@ -148,13 +148,13 @@ static __always_inline bool __list_del_entry_valid(struct list_head *entry)
> return ret;
> }
> #else
> -static inline bool __list_add_valid(struct list_head *new,
> - struct list_head *prev,
> - struct list_head *next)
> +static inline bool __list_add_valid(struct list_head *new EXT2FS_ATTR((unused)),
> + struct list_head *prev EXT2FS_ATTR((unused)),
> + struct list_head *next EXT2FS_ATTR((unused)))
> {
> return true;
> }
> -static inline bool __list_del_entry_valid(struct list_head *entry)
> +static inline bool __list_del_entry_valid(struct list_head *entry EXT2FS_ATTR((unused)))
> {
> return true;
> }
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 7/7] fuse2fs: fix uninitialized variable warnings
From: Darrick J. Wong @ 2026-05-05 0:26 UTC (permalink / raw)
To: Theodore Ts'o; +Cc: Ext4 Developers List
In-Reply-To: <20260504233301.2345652-8-tytso@mit.edu>
On Mon, May 04, 2026 at 07:33:01PM -0400, Theodore Ts'o wrote:
> Clang treats use of unitialized variables found by static analysis to
> be serious enough that it will emit them by default (even without
> -Wall). So let's fix them up.
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
I wonder why gcc doesn't complain about this??
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fuse4fs/fuse4fs.c | 8 ++++----
> 1 file changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
> index b883083f8..928473268 100644
> --- a/fuse4fs/fuse4fs.c
> +++ b/fuse4fs/fuse4fs.c
> @@ -2216,7 +2216,7 @@ static void op_mknod(fuse_req_t req, fuse_ino_t fino, const char *name,
> const struct fuse_ctx *ctxt = fuse_req_ctx(req);
> struct fuse4fs *ff = fuse4fs_get(req);
> ext2_filsys fs;
> - ext2_ino_t parent, child;
> + ext2_ino_t parent, child = 0;
> errcode_t err;
> int filetype;
> gid_t gid;
> @@ -2311,7 +2311,7 @@ static void op_mkdir(fuse_req_t req, fuse_ino_t fino, const char *name,
> const struct fuse_ctx *ctxt = fuse_req_ctx(req);
> struct fuse4fs *ff = fuse4fs_get(req);
> ext2_filsys fs;
> - ext2_ino_t parent, child;
> + ext2_ino_t parent, child = 0;
> errcode_t err;
> char *block;
> blk64_t blk;
> @@ -2990,7 +2990,7 @@ static void op_symlink(fuse_req_t req, const char *target, fuse_ino_t fino,
> const struct fuse_ctx *ctxt = fuse_req_ctx(req);
> struct fuse4fs *ff = fuse4fs_get(req);
> ext2_filsys fs;
> - ext2_ino_t parent, child;
> + ext2_ino_t parent, child = 0;
> errcode_t err;
> gid_t gid;
> int ret = 0;
> @@ -6042,7 +6042,7 @@ static int fuse4fs_main(struct fuse_args *args, struct fuse4fs *ff)
> struct fuse_cmdline_opts opts;
> struct fuse_session *se;
> struct fuse_loop_config *loop_config = NULL;
> - int ret;
> + int ret = 0;
>
> if (fuse_parse_cmdline(args, &opts) != 0) {
> ret = 1;
> --
> 2.53.0
>
>
^ permalink raw reply
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Theodore Tso @ 2026-05-05 7:21 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Ext4 Developers List
In-Reply-To: <20260505000831.GA1101423@frogsfrogsfrogs>
On Mon, May 04, 2026 at 05:08:31PM -0700, Darrick J. Wong wrote:
> Hm, curious. It's regrettable that I no longer have a Mac, and
> therefore can't really do much investigating. If you give
> -o default_permissions,allow_other , does that fix the problem?
> If that fixes it, then fuse2fs has a bug somewhere in its own
> permissions checking.
Nice catch! Yes, using default_permissions does fix things. So now I
need to figure out what changed between v1.47.4 and next, and what
default_permissions does. I notice that in kernel mode we enable
default_permissions?
Can you give me a suggestion at what I might try to look at next?
- Ted
^ permalink raw reply
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Darrick J. Wong @ 2026-05-05 15:58 UTC (permalink / raw)
To: Theodore Tso; +Cc: Ext4 Developers List
In-Reply-To: <20260505072144.GC16497@macsyma-wired.lan>
On Tue, May 05, 2026 at 09:21:44AM +0200, Theodore Tso wrote:
> On Mon, May 04, 2026 at 05:08:31PM -0700, Darrick J. Wong wrote:
> > Hm, curious. It's regrettable that I no longer have a Mac, and
> > therefore can't really do much investigating. If you give
> > -o default_permissions,allow_other , does that fix the problem?
> > If that fixes it, then fuse2fs has a bug somewhere in its own
> > permissions checking.
>
> Nice catch! Yes, using default_permissions does fix things. So now I
> need to figure out what changed between v1.47.4 and next, and what
> default_permissions does. I notice that in kernel mode we enable
> default_permissions?
Yes. Though the addition of allow_other and default_permissions was
exactly the same in 1.47.4.
> Can you give me a suggestion at what I might try to look at next?
Hrm. A bisect would be the best (but least conference-friendly) option
to narrow things down.
On Linux, adding "default_permissions" means that the kernel uses the
same permission checking code that it uses for in-kernel filesystems.
Without it, the fuse server is responsible for doing all permission
checking.
And now that I've tried it, there are bugs in fuse2fs' permission
checking (aka !default_permissions):
# fuse2fs /dev/sda /mnt -d -f -o iomap=0,allow_other
# mkdir /mnt/lost+found/frogs
# su - djwong
$ ls -d /mnt/ /mnt/lost+found/ /mnt/lost+found/frogs/
drwxr-xr-x 3 root root 4096 May 5 08:32 /mnt//
drwx------ 3 root root 16384 May 5 08:49 /mnt/lost+found//
drwxr-xr-x 2 root root 4096 May 5 08:49 /mnt/lost+found/frogs//
So I guess I'm still confused. I've wondered if fuse2fs should always
inject default_permissions?
--D
^ permalink raw reply
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Theodore Tso @ 2026-05-05 22:04 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Ext4 Developers List
In-Reply-To: <20260505155821.GI1101423@frogsfrogsfrogs>
[-- Attachment #1: Type: text/plain, Size: 991 bytes --]
On Tue, May 05, 2026 at 08:58:21AM -0700, Darrick J. Wong wrote:
> Hrm. A bisect would be the best (but least conference-friendly) option
> to narrow things down.
Well, a bisect was a bit painful because I had to cherry-pick patches
so I could get a successfull build. But I did finally come up with
the guilty commit, and it's.... surprising at least to me:
commit b0bd58062bbf645942ab4f0aced3bb229f462dde
Author: Darrick J. Wong <djwong@kernel.org>
Date: Thu Aug 28 10:30:40 2025 -0700
fuse2fs: cache symlink targets in the kernel
Speed up symlinks by allowing the kernel to cache them.
Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
I don't know why it made a difference, but this attached patch appears
fix the MacOS regression. Could this be a bug in MacFuse[1]? FWIW, I'm
running MacFuse 5.2.0_1 from MacPorts.
[1] https://macfuse.github.io
I could condition the patch so that we only avoid setting
FUSE_CAP_CACHE_SYMLINKS on MacOS? WDYT?
- Ted
[-- Attachment #2: 0001-Revert-fuse2fs-cache-symlink-targets-in-the-kernel.patch --]
[-- Type: text/plain, Size: 1085 bytes --]
From fb3d2fc975d17d97ed76b2ed76022462a3b329f1 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 5 May 2026 23:34:53 +0200
Subject: [PATCH] Revert "fuse2fs: cache symlink targets in the kernel"
This reverts commit b0bd58062bbf645942ab4f0aced3bb229f462dde.
This commit is apparently causing fuse2fs on MacOS to fail without "-o
default_permissions". It's not clear why, but it was determined using
a git bisect, and reverting the commit addresses the regression.
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
misc/fuse2fs.c | 3 ---
1 file changed, 3 deletions(-)
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c46cfc23..0f9cefa6 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -1593,9 +1593,6 @@ static void *op_init(struct fuse_conn_info *conn,
if (ff->acl)
fuse_set_feature_flag(conn, FUSE_CAP_POSIX_ACL);
#endif
-#ifdef FUSE_CAP_CACHE_SYMLINKS
- fuse_set_feature_flag(conn, FUSE_CAP_CACHE_SYMLINKS);
-#endif
#ifdef FUSE_CAP_NO_EXPORT_SUPPORT
fuse_set_feature_flag(conn, FUSE_CAP_NO_EXPORT_SUPPORT);
#endif
--
2.50.1 (Apple Git-155)
^ permalink raw reply related
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Darrick J. Wong @ 2026-05-05 22:56 UTC (permalink / raw)
To: Theodore Tso; +Cc: Ext4 Developers List
In-Reply-To: <20260505220441.GB49070@macsyma.local>
On Wed, May 06, 2026 at 12:04:41AM +0200, Theodore Tso wrote:
> On Tue, May 05, 2026 at 08:58:21AM -0700, Darrick J. Wong wrote:
> > Hrm. A bisect would be the best (but least conference-friendly) option
> > to narrow things down.
>
> Well, a bisect was a bit painful because I had to cherry-pick patches
> so I could get a successfull build. But I did finally come up with
> the guilty commit, and it's.... surprising at least to me:
>
> commit b0bd58062bbf645942ab4f0aced3bb229f462dde
> Author: Darrick J. Wong <djwong@kernel.org>
> Date: Thu Aug 28 10:30:40 2025 -0700
>
> fuse2fs: cache symlink targets in the kernel
>
> Speed up symlinks by allowing the kernel to cache them.
>
> Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
>
> I don't know why it made a difference, but this attached patch appears
> fix the MacOS regression. Could this be a bug in MacFuse[1]? FWIW, I'm
> running MacFuse 5.2.0_1 from MacPorts.
>
> [1] https://macfuse.github.io
>
> I could condition the patch so that we only avoid setting
> FUSE_CAP_CACHE_SYMLINKS on MacOS? WDYT?
I vomit a little in my mouth, because...
$ cd macfuse/Library-3
$ git grep FUSE_CAP_CACHE_SYMLINKS
<snip>
include/fuse_common.h:472:#define FUSE_CAP_CACHE_SYMLINKS (1UL << 23)
Ok, so fuse2fs passes want-flag 1<<23 into the kernel to turn on what it
*thinks* are cached symlinks.
[Editor's note: this email previously contained the following text]
Unfortunately, we both now hit a brick wall, because the macfuse
page says:
"This repository contains the source code of libfuse.dylib and
macFUSE.framework. The other components, e.g. the macFUSE kernel
extension, are closed-source."
Hence we have no way to find out what the kext thinks 1<<23 means.
There's talk of some sort of "fskit" replacement for the signed
kext, but I don't know where that is.
Actually, it's *MUCH* worse than that. Look at what
Library-3/include/fuse_kernel.h contains:
#ifdef __APPLE__
/*
* TODO(bf)
*
* Resolve conflict with vanilla API. As long as we don't support anything
* beyond 7.19 on the kernel-side this should not be an issue. We need to clean
* this up when moving to 7.20 or later.
*/
#define FUSE_DARWIN_ACCESS_EXT (1 << 23)
#define FUSE_DARWIN_THREAD_SAFE (1 << 24)
#define FUSE_DARWIN_RENAME_EXT ((1 << 25) | (1 << 26))
#define FUSE_DARWIN_FALLOCATE (1 << 27)
#define FUSE_DARWIN_CASE_INSENSITIVE (1 << 29)
#define FUSE_DARWIN_SETVOLNAME (1 << 30)
#endif
Then look up about 30 lines:
#define FUSE_CACHE_SYMLINKS (1 << 23)
#define FUSE_NO_OPENDIR_SUPPORT (1 << 24)
#define FUSE_EXPLICIT_INVAL_DATA (1 << 25)
#define FUSE_MAP_ALIGNMENT (1 << 26)
#define FUSE_SUBMOUNTS (1 << 27)
#define FUSE_HANDLE_KILLPRIV_V2 (1 << 28)
#define FUSE_SETXATTR_EXT (1 << 29)
#define FUSE_INIT_EXT (1 << 30)
FUSE kABI 7.20 added FUSE_AUTO_INVAL_DATA, which is bit 12. It looks to
me as though they decided to add their own MacOS-specific feature flags
at the end of the u32 want field. Then Linux FUSE added 11 more feature
flags, at which point they unthinkingly ported over FUSE_CACHE_SYMLINKS,
which collides with FUSE_DARWIN_ACCESS_EXT. Apparently nobody on the
macfuse end noticed, so on your machine you're getting whatever
"ACCESS_EXT" does.
$searchengine has zero hits for FUSE_DARWIN_ACCESS_EXT so who knows what
that actually does. But this sure is a messed up situation.
Does this work?
/* MacFUSE overlays feature bits with LinuxFUSE, this is fcked up */
#if defined(FUSE_CAP_CACHE_SYMLINKS) && !defined(FUSE_CACHE_SYMLINKS)
fuse_set_feature_flag(conn, FUSE_CAP_CACHE_SYMLINKS);
#endif
--D
> - Ted
> From fb3d2fc975d17d97ed76b2ed76022462a3b329f1 Mon Sep 17 00:00:00 2001
> From: Theodore Ts'o <tytso@mit.edu>
> Date: Tue, 5 May 2026 23:34:53 +0200
> Subject: [PATCH] Revert "fuse2fs: cache symlink targets in the kernel"
>
> This reverts commit b0bd58062bbf645942ab4f0aced3bb229f462dde.
>
> This commit is apparently causing fuse2fs on MacOS to fail without "-o
> default_permissions". It's not clear why, but it was determined using
> a git bisect, and reverting the commit addresses the regression.
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
> misc/fuse2fs.c | 3 ---
> 1 file changed, 3 deletions(-)
>
> diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
> index c46cfc23..0f9cefa6 100644
> --- a/misc/fuse2fs.c
> +++ b/misc/fuse2fs.c
> @@ -1593,9 +1593,6 @@ static void *op_init(struct fuse_conn_info *conn,
> if (ff->acl)
> fuse_set_feature_flag(conn, FUSE_CAP_POSIX_ACL);
> #endif
> -#ifdef FUSE_CAP_CACHE_SYMLINKS
> - fuse_set_feature_flag(conn, FUSE_CAP_CACHE_SYMLINKS);
> -#endif
> #ifdef FUSE_CAP_NO_EXPORT_SUPPORT
> fuse_set_feature_flag(conn, FUSE_CAP_NO_EXPORT_SUPPORT);
> #endif
> --
> 2.50.1 (Apple Git-155)
>
^ permalink raw reply
* Re: [PATCH] dept: update documentation function names to match implementation
From: Byungchul Park @ 2026-05-06 6:27 UTC (permalink / raw)
To: Yunseong Kim
Cc: bagasdotme, 2407018371, Dai.Ngo, Liam.Howlett, a.hindborg,
ada.coupriediaz, adilger.kernel, akpm, alex.gaynor,
alexander.shishkin, aliceryhl, amir73il, andi.shyti, andrii, anna,
arnd, ast, baolin.wang, bigeasy, bjorn3_gh, boqun.feng, bp,
brauner, broonie, bsegall, catalin.marinas, chenhuacai,
chris.p.wilson, christian.koenig, chuck.lever, cl, clrkwllms,
corbet, da.gomez, dakr, damien.lemoal, dan.j.williams,
daniel.vetter, dave.hansen, david, dennis, dietmar.eggemann,
djwong, dri-devel, duyuyang, dwmw, francesco, frederic, gary,
geert+renesas, geert, gregkh, guoweikang.kernel, gustavo,
gwan-gyeong.mun, hamohammed.sa, hannes, harry.yoo, hch,
her0gyugyu, hpa, jack, jglisse, jiangshanlai, jlayton,
joel.granados, joel, joelagnelf, johannes.berg, josef, josh,
jpoimboe, juri.lelli, kees, kernel-team, kernel_team,
kevin.brodsky, kristina.martsenko, lillian, linaro-mm-sig, link,
linux-arch, linux-arm-kernel, linux-block, linux-doc, linux-ext4,
linux-fsdevel, linux-i2c, linux-ide, linux-kernel, linux-media,
linux-mm, linux-modules, linux-nfs, linux-rt-devel, linux,
longman, lorenzo.stoakes, lossin, luto, mark.rutland, masahiroy,
mathieu.desnoyers, matthew.brost, max.byungchul.park, mcgrof,
melissa.srw, mgorman, mhocko, miguel.ojeda.sandonis, minchan,
mingo, mjguzik, neeraj.upadhyay, neil, neilb, netdev, ngupta,
ojeda, okorniev, oleg, paulmck, penberg, peterz, petr.pavlu,
qiang.zhang, rcu, richard.weiyang, rientjes, rodrigosiqueiramelo,
rostedt, rppt, rust-for-linux, samitolvanen, sashal, shakeel.butt,
sj, sumit.semwal, surenb, tglx, thomas.weissschuh, tim.c.chen, tj,
tmgross, tom, torvalds, trondmy, tytso, urezki, usamaarif642,
vbabka, vdavydov.dev, vincent.guittot, vschneid, wangfushuai,
wangkefeng.wang, will, willy, wsa+renesas, x86, yeoreum.yun, ysk,
yunseong.kim, yuzhao, ziy
In-Reply-To: <20260428162614.786365-2-yunseong.kim@est.tech>
On Tue, Apr 28, 2026 at 06:26:15PM +0200, Yunseong Kim wrote:
> Synchronize function names in the documentation with the actual
> implementation to fix naming inconsistencies.
Good catch! Thanks Yunseong. I will apply it on the top.
Byungchul
> Signed-off-by: Yunseong Kim <yunseong.kim@est.tech>
> ---
> Documentation/dev-tools/dept.rst | 2 +-
> Documentation/dev-tools/dept_api.rst | 2 +-
> 2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/Documentation/dev-tools/dept.rst b/Documentation/dev-tools/dept.rst
> index 333166464543..31b2fe629fab 100644
> --- a/Documentation/dev-tools/dept.rst
> +++ b/Documentation/dev-tools/dept.rst
> @@ -97,7 +97,7 @@ No. What about the following?
>
> mutex_lock A
> mutex_lock A <- DEADLOCK
> - wait_for_complete B <- DEADLOCK
> + wait_for_completion B <- DEADLOCK
> complete B
> mutex_unlock A
> mutex_unlock A
> diff --git a/Documentation/dev-tools/dept_api.rst b/Documentation/dev-tools/dept_api.rst
> index 409116a62849..74e7b1424ad5 100644
> --- a/Documentation/dev-tools/dept_api.rst
> +++ b/Documentation/dev-tools/dept_api.rst
> @@ -113,7 +113,7 @@ Do not use these APIs directly. The raw APIs of dept are:
> dept_stage_wait(map, key, ip, wait_func, time);
> dept_request_event_wait_commit();
> dept_clean_stage();
> - dept_stage_event(task, ip);
> + dept_ttwu_stage_wait(task, ip);
> dept_ecxt_enter(map, evt_flags, ip, ecxt_func, evt_func, sub_local);
> dept_ecxt_holding(map, evt_flags);
> dept_request_event(map, ext_wgen);
> --
> 2.53.0
^ permalink raw reply
* [PATCH] ext4: enable mballoc kunit tests for blocksize > PAGE_SIZE
From: Baokun Li @ 2026-05-06 7:59 UTC (permalink / raw)
To: linux-ext4
Cc: tytso, adilger.kernel, jack, yi.zhang, ojaswin, ritesh.list,
libaokun
With Large Block Size (LBS) support, ext4 can now use block sizes larger
than PAGE_SIZE. The mballoc kunit tests previously skipped three test
cases (test_mb_mark_used, test_mb_free_blocks, test_mb_mark_used_cost)
under this configuration because the buddy cache inode's folio mapping
order was never initialized in the test harness.
The real mount path configures s_min_folio_order and s_max_folio_order
in ext4_fill_super(), which allows ext4_set_inode_mapping_order() to
set up the correct folio order for the buddy cache inode. The kunit
test bypasses ext4_fill_super(), so the mapping order stayed at zero
and __filemap_get_folio() allocated order-0 folios too small for LBS.
Initialize s_min_folio_order and s_max_folio_order in mbt_init_sb_layout()
to mirror ext4_fill_super() behavior, enabling properly sized folio
allocations and removing the three blocksize > PAGE_SIZE skips.
Signed-off-by: Baokun Li <libaokun@linux.alibaba.com>
---
fs/ext4/mballoc-test.c | 14 ++------------
1 file changed, 2 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
index 90ed505fa4b1..04bc9f773d63 100644
--- a/fs/ext4/mballoc-test.c
+++ b/fs/ext4/mballoc-test.c
@@ -206,6 +206,8 @@ static void mbt_init_sb_layout(struct super_block *sb,
sbi->s_desc_per_block_bits =
sb->s_blocksize_bits - (fls(layout->desc_size) - 1);
sbi->s_desc_per_block = 1 << sbi->s_desc_per_block_bits;
+ sbi->s_min_folio_order = get_order(sb->s_blocksize);
+ sbi->s_max_folio_order = sbi->s_min_folio_order;
es->s_first_data_block = cpu_to_le32(0);
es->s_blocks_count_lo = cpu_to_le32(layout->blocks_per_group *
@@ -791,10 +793,6 @@ static void test_mb_mark_used(struct kunit *test)
struct test_range ranges[TEST_RANGE_COUNT];
int i;
- /* buddy cache assumes that each page contains at least one block */
- if (sb->s_blocksize > PAGE_SIZE)
- kunit_skip(test, "blocksize exceeds pagesize");
-
bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
buddy = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
@@ -858,10 +856,6 @@ static void test_mb_free_blocks(struct kunit *test)
int i;
struct test_range ranges[TEST_RANGE_COUNT];
- /* buddy cache assumes that each page contains at least one block */
- if (sb->s_blocksize > PAGE_SIZE)
- kunit_skip(test, "blocksize exceeds pagesize");
-
bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
buddy = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
@@ -905,10 +899,6 @@ static void test_mb_mark_used_cost(struct kunit *test)
int i, j;
unsigned long start, end, all = 0;
- /* buddy cache assumes that each page contains at least one block */
- if (sb->s_blocksize > PAGE_SIZE)
- kunit_skip(test, "blocksize exceeds pagesize");
-
ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
KUNIT_ASSERT_EQ(test, ret, 0);
--
2.43.7
^ permalink raw reply related
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Theodore Tso @ 2026-05-06 9:28 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Ext4 Developers List
In-Reply-To: <20260505225635.GT7765@frogsfrogsfrogs>
On Tue, May 05, 2026 at 03:56:35PM -0700, Darrick J. Wong wrote:
> FUSE kABI 7.20 added FUSE_AUTO_INVAL_DATA, which is bit 12. It looks to
> me as though they decided to add their own MacOS-specific feature flags
> at the end of the u32 want field. Then Linux FUSE added 11 more feature
> flags, at which point they unthinkingly ported over FUSE_CACHE_SYMLINKS,
> which collides with FUSE_DARWIN_ACCESS_EXT. Apparently nobody on the
> macfuse end noticed, so on your machine you're getting whatever
> "ACCESS_EXT" does.
Yeah....
What MacFuse needs to do is to steal some extra fields from struct
fuse_init_in and fuse_init_out for the darwin-specific capabilities.
It turns out it already has conn->{want,capable}_darwin, but there's
no way to pass it in and out of op_init....
#ifdef __APPLE__
/*
* TODO(bf)
*
* Resolve conflict with vanilla API. We need a separate field flags for
* Darwin-only flags. As long as we don't support anything beyond ABI
* version 7.19 on the kernel-side this should not be an issue, though.
* We need to clean this up when moving to 7.20 or later.
*/
if (se->conn.want_darwin & FUSE_DARWIN_CAP_ACCESS_EXT)
outargflags |= FUSE_DARWIN_ACCESS_EXT;
So I *guess* what MacFuse needs to do is to do something like:
struct fuse_init_in {
uint32_t major;
uint32_t minor;
uint32_t max_readahead;
uint32_t flags;
uint32_t flags2;
uint32_t unused[9];
uint32_t darwin_flags;
uint32_t darwin_flags2;
};
am I right in understanding that fuse_*_{in,out} is private between
the OS's libfuse and OS's fuse driver or kernel extension, so
it's not disastrous for fuse_kernel.h for Mac and Linux to drift?
> Does this work?
>
> /* MacFUSE overlays feature bits with LinuxFUSE, this is fcked up */
> #if defined(FUSE_CAP_CACHE_SYMLINKS) && !defined(FUSE_CACHE_SYMLINKS)
> fuse_set_feature_flag(conn, FUSE_CAP_CACHE_SYMLINKS);
> #endif
What I'm thinking about doing is adding at the beginning of
fuse[24]fs.c:
#ifdef __APPLE__
/*
* Sigh.... MacFuse is overloading the top bits of the flags field of
* struct fuse_init_{out} so we have to avoid using these capability
* flags until this gets fixed in MacFUSE
*/
#undef FUSE_CACHE_SYMLINKS
#undef FUSE_NO_OPENDIR_SUPPORT
#undef FUSE_EXPLICIT_INVAL_DATA
#undef FUSE_MAP_ALIGNMENT
#undef FUSE_SUBMOUNTS
#undef FUSE_HANDLE_KILLPRIV_V2
#undef FUSE_SETXATTR_EXT
#undef FUSE_INIT_EXT
#endif
- Ted
^ permalink raw reply
* Re: [PATCH] ext4: enable mballoc kunit tests for blocksize > PAGE_SIZE
From: Jan Kara @ 2026-05-06 9:35 UTC (permalink / raw)
To: Baokun Li
Cc: linux-ext4, tytso, adilger.kernel, jack, yi.zhang, ojaswin,
ritesh.list
In-Reply-To: <20260506075900.3649944-1-libaokun@linux.alibaba.com>
On Wed 06-05-26 15:59:00, Baokun Li wrote:
> With Large Block Size (LBS) support, ext4 can now use block sizes larger
> than PAGE_SIZE. The mballoc kunit tests previously skipped three test
> cases (test_mb_mark_used, test_mb_free_blocks, test_mb_mark_used_cost)
> under this configuration because the buddy cache inode's folio mapping
> order was never initialized in the test harness.
>
> The real mount path configures s_min_folio_order and s_max_folio_order
> in ext4_fill_super(), which allows ext4_set_inode_mapping_order() to
> set up the correct folio order for the buddy cache inode. The kunit
> test bypasses ext4_fill_super(), so the mapping order stayed at zero
> and __filemap_get_folio() allocated order-0 folios too small for LBS.
>
> Initialize s_min_folio_order and s_max_folio_order in mbt_init_sb_layout()
> to mirror ext4_fill_super() behavior, enabling properly sized folio
> allocations and removing the three blocksize > PAGE_SIZE skips.
>
> Signed-off-by: Baokun Li <libaokun@linux.alibaba.com>
Looks sensible. Feel free to add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> fs/ext4/mballoc-test.c | 14 ++------------
> 1 file changed, 2 insertions(+), 12 deletions(-)
>
> diff --git a/fs/ext4/mballoc-test.c b/fs/ext4/mballoc-test.c
> index 90ed505fa4b1..04bc9f773d63 100644
> --- a/fs/ext4/mballoc-test.c
> +++ b/fs/ext4/mballoc-test.c
> @@ -206,6 +206,8 @@ static void mbt_init_sb_layout(struct super_block *sb,
> sbi->s_desc_per_block_bits =
> sb->s_blocksize_bits - (fls(layout->desc_size) - 1);
> sbi->s_desc_per_block = 1 << sbi->s_desc_per_block_bits;
> + sbi->s_min_folio_order = get_order(sb->s_blocksize);
> + sbi->s_max_folio_order = sbi->s_min_folio_order;
>
> es->s_first_data_block = cpu_to_le32(0);
> es->s_blocks_count_lo = cpu_to_le32(layout->blocks_per_group *
> @@ -791,10 +793,6 @@ static void test_mb_mark_used(struct kunit *test)
> struct test_range ranges[TEST_RANGE_COUNT];
> int i;
>
> - /* buddy cache assumes that each page contains at least one block */
> - if (sb->s_blocksize > PAGE_SIZE)
> - kunit_skip(test, "blocksize exceeds pagesize");
> -
> bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
> KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
> buddy = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
> @@ -858,10 +856,6 @@ static void test_mb_free_blocks(struct kunit *test)
> int i;
> struct test_range ranges[TEST_RANGE_COUNT];
>
> - /* buddy cache assumes that each page contains at least one block */
> - if (sb->s_blocksize > PAGE_SIZE)
> - kunit_skip(test, "blocksize exceeds pagesize");
> -
> bitmap = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
> KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bitmap);
> buddy = kunit_kzalloc(test, sb->s_blocksize, GFP_KERNEL);
> @@ -905,10 +899,6 @@ static void test_mb_mark_used_cost(struct kunit *test)
> int i, j;
> unsigned long start, end, all = 0;
>
> - /* buddy cache assumes that each page contains at least one block */
> - if (sb->s_blocksize > PAGE_SIZE)
> - kunit_skip(test, "blocksize exceeds pagesize");
> -
> ret = ext4_mb_load_buddy_test(sb, TEST_GOAL_GROUP, &e4b);
> KUNIT_ASSERT_EQ(test, ret, 0);
>
> --
> 2.43.7
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
^ permalink raw reply
* [RFC v1 0/1] buffer_head: fail fast on repeated reads after I/O errors
From: Diangang Li @ 2026-05-06 13:50 UTC (permalink / raw)
To: axboe, viro, brauner
Cc: linux-block, linux-ext4, linux-fsdevel, changfengnan, Diangang Li
From: Diangang Li <lidiangang@bytedance.com>
A production system reported hung tasks blocked for 300s+ in ext4
buffer_head paths. Hung task reports were accompanied by disk I/O errors,
but profiling showed that most individual reads completed (or failed)
within 10s, with the worst case around 60s.
At the same time, we observed a high repeat rate to the same disk LBAs.
The repeated reads frequently showed seconds-level latency and ended with
I/O errors, e.g.:
[Tue Mar 24 14:16:24 2026] blk_update_request: I/O error, dev sdi,
sector 10704150288 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
[Tue Mar 24 14:16:25 2026] blk_update_request: I/O error, dev sdi,
sector 10704488160 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
[Tue Mar 24 14:16:26 2026] blk_update_request: I/O error, dev sdi,
sector 10704382912 op 0x0:(READ) flags 0x0 phys_seg 1 prio class 0
We also sampled repeated-LBA latency histograms on /dev/sdi and saw that
the same error-prone LBAs were re-submitted many times with ~1-4s latency:
LBA 10704488160 (count=22): 1-2s: 20, 2-4s: 2
LBA 10704382912 (count=21): 1-2s: 20, 2-4s: 1
LBA 10704150288 (count=21): 1-2s: 19, 2-4s: 2
Root cause
==========
buffer_head reads serialize I/O via BH_Lock. When one read fails, the
buffer remains !Uptodate. With multiple threads concurrently accessing
the same buffer_head, each waiter wakes up after the previous owner drops
BH_Lock, then submits the same read again and waits again. This makes the
latency grow linearly with the number of contending threads, leading to
300s+ hung tasks.
The failing I/Os are repeatedly issued to the same LBA. The observed 1s+
per-I/O latency is likely from device-side retry/error recovery. On SCSI
the driver typically retries reads several times (e.g. 5 retries in our
environment), so a single filesystem submission can easily accumulate 5s+
delay before failing. When multiple threads then re-submit the same
failing read and serialize on BH_Lock, the delay is amplified into 300s+
hung tasks.
Similar behavior exists for other devices (e.g. NVMe with multiple
internal retries).
Example hung stacks:
INFO: task toutiao.infra.t:3760933 blocked for more than 327 seconds.
Call Trace:
__schedule
io_schedule
__wait_on_bit_lock
bh_uptodate_or_lock
__read_extent_tree_block
ext4_find_extent
ext4_ext_map_blocks
ext4_map_blocks
ext4_getblk
ext4_bread
__ext4_read_dirblock
dx_probe
ext4_htree_fill_tree
ext4_readdir
iterate_dir
ksys_getdents64
INFO: task toutiao.infra.t:2724456 blocked for more than 327 seconds.
Call Trace:
__schedule
io_schedule
__wait_on_bit_lock
ext4_read_bh_lock
ext4_bread
__ext4_read_dirblock
htree_dirblock_to_tree
ext4_htree_fill_tree
ext4_readdir
iterate_dir
ksys_getdents64
This series follows an earlier ext4-only RFC and moves the policy to the
generic buffer_head path so other buffer_head users can opt in with the
same per-block-device knob.
Approach
========
Record non-readahead read failures on buffer_head (BH_Read_EIO +
b_err_timestamp). When a per-bdev retry window is configured, submit_bh()
will skip submitting another non-readahead read for a buffer_head that
already failed within the window and complete it immediately with failure.
Clear the state on successful read or rewrite so the buffer can recover
if the error is transient.
The timestamp is recorded on the first failure only, so repeated failures
do not extend the retry window. After the window expires, the next
non-readahead read is submitted normally and can discover that the device
or media has recovered.
The retry window is configured per block device:
/sys/block/<disk>/read_err_retry_sec
/sys/block/<disk>/<part>/read_err_retry_sec
The default value is 0, which keeps the current behavior: after a read
error, callers may keep retrying the same read. Set it to a non-zero
value to fail repeated non-readahead reads fast within the window.
Patch summary
=============
1) Add BH_Read_EIO and b_err_timestamp to buffer_head.
2) Track non-readahead read failures in the submit_bh() bio completion
path.
3) Add per-bdev read_err_retry_sec sysfs knobs for disks and partitions.
4) Fail repeated non-readahead submit_bh() reads fast within the
configured window, while leaving readahead and other bio users
unchanged.
Diangang Li (1):
buffer_head: fail fast on repeated reads after I/O errors
Documentation/ABI/stable/sysfs-block | 26 +++++++++++
block/genhd.c | 24 ++++++++++
block/partitions/core.c | 24 ++++++++++
fs/buffer.c | 65 ++++++++++++++++++++++++++++
include/linux/blk_types.h | 3 ++
include/linux/buffer_head.h | 10 +++++
6 files changed, 152 insertions(+)
--
2.39.5
^ permalink raw reply
* [RFC v1 1/1] buffer_head: fail fast on repeated reads after I/O errors
From: Diangang Li @ 2026-05-06 13:50 UTC (permalink / raw)
To: axboe, viro, brauner
Cc: linux-block, linux-ext4, linux-fsdevel, changfengnan, Diangang Li
In-Reply-To: <20260506135047.2670453-1-diangangli@gmail.com>
From: Diangang Li <lidiangang@bytedance.com>
A failed buffer_head read leaves the buffer !Uptodate. If multiple
threads hit that same buffer_head, they serialize on BH_Lock and each
one re-submits the same read after the previous owner drops the lock.
If the device is slow to return the error, this can turn one bad block
into long stalls and repeated slow I/O.
Trying to remember bad LBAs in block or drivers would need a generic
per-device table with lookup, eviction, and lifetime rules. For buffer_head
users, keep the failure state with the cached buffer_head instead.
Track non-readahead read I/O errors in buffer_head with a dedicated bit
and a failure timestamp. Update this state from the bio completion path.
Add an optional per-bdev retry window: within the window, non-readahead
submit_bh() reads complete immediately with failure for a buffer_head
that recently saw a non-readahead read error. A successful read or
rewrite clears the state.
The timestamp is recorded on the first error only, so repeated failures do
not extend the window. Once the window expires, the next read is submitted
normally and can discover that the device or media has recovered.
Configure per block device via sysfs:
/sys/block/<disk>/read_err_retry_sec
/sys/block/<disk>/<part>/read_err_retry_sec
Default is 0, preserving existing behavior. Disk and partition values are
independent, and values larger than MAX_JIFFY_OFFSET / HZ are rejected to
avoid jiffies overflow.
Link: https://lore.kernel.org/linux-ext4/20260325093349.630193-1-diangangli@gmail.com/
Signed-off-by: Diangang Li <lidiangang@bytedance.com>
---
Documentation/ABI/stable/sysfs-block | 26 +++++++++++
block/genhd.c | 24 ++++++++++
block/partitions/core.c | 24 ++++++++++
fs/buffer.c | 65 ++++++++++++++++++++++++++++
include/linux/blk_types.h | 3 ++
include/linux/buffer_head.h | 10 +++++
6 files changed, 152 insertions(+)
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 900b3fc4c72d0..b850f96fa048e 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -185,6 +185,32 @@ Description:
unsigned integer, but only "0" and "1" are valid values.
+What: /sys/block/<disk>/read_err_retry_sec
+What: /sys/block/<disk>/<partition>/read_err_retry_sec
+Date: May 2026
+Contact: linux-block@vger.kernel.org
+Description:
+ (RW) Configure the fail-fast window, in seconds, for repeated
+ buffer_head reads after read I/O errors.
+
+ The default value is 0, which disables the fail-fast behavior and
+ preserves the existing retry behavior. When this value is non-zero,
+ a buffer_head that has recently seen a non-readahead read I/O error
+ can fail another read immediately within the configured window,
+ instead of submitting another bio for the same buffer_head.
+
+ This only applies to buffer_head reads submitted through submit_bh().
+ It is not a generic block layer read retry policy, and it does not
+ affect direct I/O or non-buffer_head bio submissions.
+
+ Disk and partition attributes are independent. Setting the disk
+ attribute does not change the value for existing or future
+ partition block devices.
+
+ The maximum accepted value is MAX_JIFFY_OFFSET / HZ. Larger values
+ are rejected with -ERANGE.
+
+
What: /sys/block/<disk>/<partition>/alignment_offset
Date: April 2009
Contact: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/genhd.c b/block/genhd.c
index 7d6854fd28e95..302dce67d685c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1159,6 +1159,28 @@ static ssize_t partscan_show(struct device *dev,
return sysfs_emit(buf, "%u\n", disk_has_partscan(dev_to_disk(dev)));
}
+static ssize_t read_err_retry_sec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lu\n",
+ READ_ONCE(dev_to_bdev(dev)->bd_read_err_retry_sec));
+}
+
+static ssize_t read_err_retry_sec_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long sec;
+
+ if (kstrtoul(buf, 0, &sec))
+ return -EINVAL;
+ if (sec > MAX_JIFFY_OFFSET / HZ)
+ return -ERANGE;
+
+ WRITE_ONCE(dev_to_bdev(dev)->bd_read_err_retry_sec, sec);
+ return count;
+}
+
static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
@@ -1173,6 +1195,7 @@ static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
static DEVICE_ATTR(diskseq, 0444, diskseq_show, NULL);
static DEVICE_ATTR(partscan, 0444, partscan_show, NULL);
+static DEVICE_ATTR_RW(read_err_retry_sec);
#ifdef CONFIG_FAIL_MAKE_REQUEST
ssize_t part_fail_show(struct device *dev,
@@ -1224,6 +1247,7 @@ static struct attribute *disk_attrs[] = {
&dev_attr_events_poll_msecs.attr,
&dev_attr_diskseq.attr,
&dev_attr_partscan.attr,
+ &dev_attr_read_err_retry_sec.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 5d5332ce586b6..62b4c2f70709f 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -205,6 +205,28 @@ static ssize_t part_discard_alignment_show(struct device *dev,
return sysfs_emit(buf, "%u\n", bdev_discard_alignment(dev_to_bdev(dev)));
}
+static ssize_t read_err_retry_sec_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%lu\n",
+ READ_ONCE(dev_to_bdev(dev)->bd_read_err_retry_sec));
+}
+
+static ssize_t read_err_retry_sec_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long sec;
+
+ if (kstrtoul(buf, 0, &sec))
+ return -EINVAL;
+ if (sec > MAX_JIFFY_OFFSET / HZ)
+ return -ERANGE;
+
+ WRITE_ONCE(dev_to_bdev(dev)->bd_read_err_retry_sec, sec);
+ return count;
+}
+
static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
static DEVICE_ATTR(start, 0444, part_start_show, NULL);
static DEVICE_ATTR(size, 0444, part_size_show, NULL);
@@ -213,6 +235,7 @@ static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
+static DEVICE_ATTR_RW(read_err_retry_sec);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
@@ -227,6 +250,7 @@ static struct attribute *part_attrs[] = {
&dev_attr_discard_alignment.attr,
&dev_attr_stat.attr,
&dev_attr_inflight.attr,
+ &dev_attr_read_err_retry_sec.attr,
#ifdef CONFIG_FAIL_MAKE_REQUEST
&dev_attr_fail.attr,
#endif
diff --git a/fs/buffer.c b/fs/buffer.c
index b0b3792b1496e..2a28ab6a51f0e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -920,6 +920,7 @@ static sector_t folio_init_buffers(struct folio *folio,
bh->b_private = NULL;
bh->b_bdev = bdev;
bh->b_blocknr = block;
+ clear_buffer_read_io_error_state(bh);
if (uptodate)
set_buffer_uptodate(bh);
if (block < end_block)
@@ -1503,6 +1504,7 @@ static void discard_buffer(struct buffer_head * bh)
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
+ clear_buffer_read_io_error_state(bh);
b_state = READ_ONCE(bh->b_state);
do {
} while (!try_cmpxchg_relaxed(&bh->b_state, &b_state,
@@ -1997,6 +1999,7 @@ iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh,
bh->b_blocknr = (iomap->addr + offset - iomap->offset) >>
inode->i_blkbits;
set_buffer_mapped(bh);
+ clear_buffer_read_io_error_state(bh);
return 0;
default:
WARN_ON_ONCE(1);
@@ -2663,6 +2666,33 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
}
EXPORT_SYMBOL(generic_block_bmap);
+static void bh_update_io_error_state(struct buffer_head *bh, const struct bio *bio)
+{
+ const enum req_op op = bio_op(bio);
+
+ if (op != REQ_OP_READ && op != REQ_OP_WRITE)
+ return;
+
+ /*
+ * Track non-readahead read failures (timestamped) so submit_bh() can
+ * fail repeated reads fast. A successful read or rewrite clears the
+ * state.
+ */
+ if (!bio->bi_status) {
+ clear_buffer_read_io_error(bh);
+ bh->b_err_timestamp = 0;
+ return;
+ }
+
+ /* Record the first failure; don't extend the window on repeats. */
+ if (op != REQ_OP_READ || (bio->bi_opf & REQ_RAHEAD) ||
+ buffer_read_io_error(bh))
+ return;
+
+ set_buffer_read_io_error(bh);
+ bh->b_err_timestamp = jiffies;
+}
+
static void end_bio_bh_io_sync(struct bio *bio)
{
struct buffer_head *bh = bio->bi_private;
@@ -2670,10 +2700,37 @@ static void end_bio_bh_io_sync(struct bio *bio)
if (unlikely(bio_flagged(bio, BIO_QUIET)))
set_bit(BH_Quiet, &bh->b_state);
+ bh_update_io_error_state(bh, bio);
+
bh->b_end_io(bh, !bio->bi_status);
bio_put(bio);
}
+static bool bh_failfast_read(struct buffer_head *bh)
+{
+ unsigned long retry_sec = READ_ONCE(bh->b_bdev->bd_read_err_retry_sec);
+
+ if (!retry_sec || !buffer_read_io_error(bh))
+ return false;
+
+ /* No timestamp: treat as stale state and re-arm on the next failure. */
+ if (!bh->b_err_timestamp) {
+ clear_buffer_read_io_error(bh);
+ return false;
+ }
+
+ if (time_before(jiffies,
+ bh->b_err_timestamp + secs_to_jiffies(retry_sec))) {
+ test_set_buffer_req(bh);
+ bh->b_end_io(bh, 0);
+ return true;
+ }
+
+ clear_buffer_read_io_error(bh);
+ bh->b_err_timestamp = 0;
+ return false;
+}
+
static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh,
gfp_t gfp_mask)
{
@@ -2702,6 +2759,14 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
BUG_ON(buffer_delay(bh));
BUG_ON(buffer_unwritten(bh));
+ /*
+ * Fail fast for repeated non-readahead buffer_head reads after a recent
+ * I/O error. This avoids serializing many callers on BH_Lock while
+ * re-submitting the same failing read.
+ */
+ if (op == REQ_OP_READ && !(opf & REQ_RAHEAD) && bh_failfast_read(bh))
+ return;
+
/*
* Only clear out a write error when rewriting
*/
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 8808ee76e73c0..9437c471ee7d7 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -69,6 +69,9 @@ struct block_device {
atomic_t bd_fsfreeze_count; /* number of freeze requests */
struct mutex bd_fsfreeze_mutex; /* serialize freeze/thaw */
+ /* Seconds; 0 disables read fail-fast window for submit_bh(READ). */
+ unsigned long bd_read_err_retry_sec;
+
struct partition_meta_info *bd_meta_info;
int bd_writers;
#ifdef CONFIG_SECURITY
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index e4939e33b4b51..3ab36429f8f38 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -29,6 +29,7 @@ enum bh_state_bits {
BH_Delay, /* Buffer is not yet allocated on disk */
BH_Boundary, /* Block is followed by a discontiguity */
BH_Write_EIO, /* I/O error on write */
+ BH_Read_EIO, /* I/O error on read */
BH_Unwritten, /* Buffer is allocated on disk but not written */
BH_Quiet, /* Buffer Error Prinks to be quiet */
BH_Meta, /* Buffer contains metadata */
@@ -79,6 +80,7 @@ struct buffer_head {
spinlock_t b_uptodate_lock; /* Used by the first bh in a page, to
* serialise IO completion of other
* buffers in the page */
+ unsigned long b_err_timestamp; /* timestamp of last I/O error */
};
/*
@@ -132,11 +134,18 @@ BUFFER_FNS(Async_Write, async_write)
BUFFER_FNS(Delay, delay)
BUFFER_FNS(Boundary, boundary)
BUFFER_FNS(Write_EIO, write_io_error)
+BUFFER_FNS(Read_EIO, read_io_error)
BUFFER_FNS(Unwritten, unwritten)
BUFFER_FNS(Meta, meta)
BUFFER_FNS(Prio, prio)
BUFFER_FNS(Defer_Completion, defer_completion)
+static inline void clear_buffer_read_io_error_state(struct buffer_head *bh)
+{
+ clear_buffer_read_io_error(bh);
+ bh->b_err_timestamp = 0;
+}
+
static __always_inline void set_buffer_uptodate(struct buffer_head *bh)
{
/*
@@ -411,6 +420,7 @@ map_bh(struct buffer_head *bh, struct super_block *sb, sector_t block)
bh->b_bdev = sb->s_bdev;
bh->b_blocknr = block;
bh->b_size = sb->s_blocksize;
+ clear_buffer_read_io_error_state(bh);
}
static inline void wait_on_buffer(struct buffer_head *bh)
--
2.39.5
^ permalink raw reply related
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Darrick J. Wong @ 2026-05-06 14:34 UTC (permalink / raw)
To: Theodore Tso; +Cc: Ext4 Developers List, fuse-devel
In-Reply-To: <20260506092858.GC49070@macsyma.local>
[cc fuse-devel]
TLDR for the fuse developers: Ted and I discovered a collision between
the upstream libfuse feature bits and the MacFUSE feature bits, which
causes macfuse to do the wrong thing if you try to enable symlink
pagecache.
https://lore.kernel.org/linux-ext4/20260505225635.GT7765@frogsfrogsfrogs/
On Wed, May 06, 2026 at 11:28:58AM +0200, Theodore Tso wrote:
> On Tue, May 05, 2026 at 03:56:35PM -0700, Darrick J. Wong wrote:
> > FUSE kABI 7.20 added FUSE_AUTO_INVAL_DATA, which is bit 12. It looks to
> > me as though they decided to add their own MacOS-specific feature flags
> > at the end of the u32 want field. Then Linux FUSE added 11 more feature
> > flags, at which point they unthinkingly ported over FUSE_CACHE_SYMLINKS,
> > which collides with FUSE_DARWIN_ACCESS_EXT. Apparently nobody on the
> > macfuse end noticed, so on your machine you're getting whatever
> > "ACCESS_EXT" does.
>
> Yeah....
>
> What MacFuse needs to do is to steal some extra fields from struct
> fuse_init_in and fuse_init_out for the darwin-specific capabilities.
> It turns out it already has conn->{want,capable}_darwin, but there's
> no way to pass it in and out of op_init....
>
> #ifdef __APPLE__
> /*
> * TODO(bf)
> *
> * Resolve conflict with vanilla API. We need a separate field flags for
> * Darwin-only flags. As long as we don't support anything beyond ABI
> * version 7.19 on the kernel-side this should not be an issue, though.
> * We need to clean this up when moving to 7.20 or later.
> */
> if (se->conn.want_darwin & FUSE_DARWIN_CAP_ACCESS_EXT)
> outargflags |= FUSE_DARWIN_ACCESS_EXT;
>
> So I *guess* what MacFuse needs to do is to do something like:
>
> struct fuse_init_in {
> uint32_t major;
> uint32_t minor;
> uint32_t max_readahead;
> uint32_t flags;
> uint32_t flags2;
> uint32_t unused[9];
> uint32_t darwin_flags;
> uint32_t darwin_flags2;
> };
>
> am I right in understanding that fuse_*_{in,out} is private between
> the OS's libfuse and OS's fuse driver or kernel extension, so
> it's not disastrous for fuse_kernel.h for Mac and Linux to drift?
Yeah, the easiest method would be to carve out the darwin_flags field.
If they still want to use flags/flags2, then they could probably still
fix it by redefining whichever feature was added later (probably
FUSE_CACHE_SYMLINKS) because right now they're broken, so it's not an
ABI break to redefine the symbol.
Either way someone will have to talk to the MacFUSE people about this.
> > Does this work?
> >
> > /* MacFUSE overlays feature bits with LinuxFUSE, this is fcked up */
> > #if defined(FUSE_CAP_CACHE_SYMLINKS) && !defined(FUSE_CACHE_SYMLINKS)
> > fuse_set_feature_flag(conn, FUSE_CAP_CACHE_SYMLINKS);
> > #endif
>
> What I'm thinking about doing is adding at the beginning of
> fuse[24]fs.c:
>
> #ifdef __APPLE__
> /*
> * Sigh.... MacFuse is overloading the top bits of the flags field of
> * struct fuse_init_{out} so we have to avoid using these capability
> * flags until this gets fixed in MacFUSE
> */
> #undef FUSE_CACHE_SYMLINKS
> #undef FUSE_NO_OPENDIR_SUPPORT
> #undef FUSE_EXPLICIT_INVAL_DATA
> #undef FUSE_MAP_ALIGNMENT
> #undef FUSE_SUBMOUNTS
> #undef FUSE_HANDLE_KILLPRIV_V2
> #undef FUSE_SETXATTR_EXT
> #undef FUSE_INIT_EXT
> #endif
That works for now. If macfuse fixes themselves, then I guess we could
turn that into a configure check.
--D
>
> - Ted
^ permalink raw reply
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Theodore Tso @ 2026-05-06 15:08 UTC (permalink / raw)
To: Darrick J. Wong; +Cc: Ext4 Developers List, fuse-devel
In-Reply-To: <20260506143413.GA2241589@frogsfrogsfrogs>
On Wed, May 06, 2026 at 07:34:13AM -0700, Darrick J. Wong wrote:
> [cc fuse-devel]
>
> TLDR for the fuse developers: Ted and I discovered a collision between
> the upstream libfuse feature bits and the MacFUSE feature bits, which
> causes macfuse to do the wrong thing if you try to enable symlink
> pagecache.
This is the patch for fuse2fs and fuse4fs in e2fsprogs which works
around the problem (tested on MacOS using macfuse 5.2.0_1 from
MacPorts). More details about why it was needed is in the commit
description.
- Ted
From 67f1ec55a1309abead16cad883e38b798a567191 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 6 May 2026 10:51:56 -0400
Subject: [PATCH] fuse2fs, fuse4fs: Fix MacFuse compatibility issue
Unfortunately, MacFuse is overloading the top bits of the flags field
in struct fuse_init_{out} for MacFuse-specific capability extensions.
This results in an attempt to use FUSE_CAP_CACHE_SYMLINKS when linking
with the libfuse in MacPorts will end up enabling
FUSE_DARWIN_CAP_ACCESS_EXT with MacFuse. Hilarity then ensues with
all non-privileged access failing with permission denied.
The change which is needed in MacFuse is described in a TODO(bf)
statement:
https://github.com/macfuse/library/blob/ddb630db8327a50b6670ef5e4f5e6da82a549e99/lib/fuse_lowlevel.c#L3415
I plan to submit a bug report to MacFuse, but in the mean time, work around
the problem by disabling the overloaded capability flags on MacOS.
Link: https://lore.kernel.org/r/20260505225635.GT7765@frogsfrogsfrogs
Link: https://lore.kernel.org/r/20260506092858.GC49070@macsyma.local
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
fuse4fs/fuse4fs.c | 18 ++++++++++++++++++
misc/fuse2fs.c | 18 ++++++++++++++++++
2 files changed, 36 insertions(+)
diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
index 92847326..2739da92 100644
--- a/fuse4fs/fuse4fs.c
+++ b/fuse4fs/fuse4fs.c
@@ -120,6 +120,24 @@
#endif
#endif /* !defined(ENODATA) */
+#ifdef __APPLE__
+/*
+ * Sigh.... MacFuse is overloading the top bits of the flags field in
+ * struct fuse_init_{out} for MacFuse-specific capability extensions.
+ * Avoid using these fuse3 capability flags until this gets fixed in
+ * MacFUSE
+ */
+#undef FUSE_CAP_CACHE_SYMLINKS
+#undef FUSE_CAP_NO_OPENDIR_SUPPORT
+#undef FUSE_CAP_EXPLICIT_INVAL_DATA
+#undef FUSE_CAP_EXPIRE_ONLY
+#undef FUSE_CAP_SETXATTR_EXT
+#undef FUSE_CAP_DIRECT_IO_ALLOW_MMAP
+#undef FUSE_CAP_PASSTHROUGH
+#undef FUSE_CAP_NO_EXPORT_SUPPORT
+#undef FUSE_CAP_OVER_IO_URING
+#endif
+
#define FUSE4FS_ATTR_TIMEOUT (0.0)
static inline uint64_t round_up(uint64_t b, unsigned int align)
diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
index c46cfc23..0f2a3c35 100644
--- a/misc/fuse2fs.c
+++ b/misc/fuse2fs.c
@@ -116,6 +116,24 @@
#endif
#endif /* !defined(ENODATA) */
+#ifdef __APPLE__
+/*
+ * Sigh.... MacFuse is overloading the top bits of the flags field in
+ * struct fuse_init_{out} for MacFuse-specific capability extensions.
+ * Avoid using these fuse3 capability flags until this gets fixed in
+ * MacFUSE
+ */
+#undef FUSE_CAP_CACHE_SYMLINKS
+#undef FUSE_CAP_NO_OPENDIR_SUPPORT
+#undef FUSE_CAP_EXPLICIT_INVAL_DATA
+#undef FUSE_CAP_EXPIRE_ONLY
+#undef FUSE_CAP_SETXATTR_EXT
+#undef FUSE_CAP_DIRECT_IO_ALLOW_MMAP
+#undef FUSE_CAP_PASSTHROUGH
+#undef FUSE_CAP_NO_EXPORT_SUPPORT
+#undef FUSE_CAP_OVER_IO_URING
+#endif
+
static inline uint64_t round_up(uint64_t b, unsigned int align)
{
unsigned int m;
--
2.50.1 (Apple Git-155)
^ permalink raw reply related
* Re: [PATCH 0/7] fix up issues from djwong/fuse4fs-fork
From: Darrick J. Wong @ 2026-05-06 16:39 UTC (permalink / raw)
To: Theodore Tso; +Cc: Ext4 Developers List, fuse-devel
In-Reply-To: <20260506150833.GD49070@macsyma.local>
On Wed, May 06, 2026 at 05:08:33PM +0200, Theodore Tso wrote:
> On Wed, May 06, 2026 at 07:34:13AM -0700, Darrick J. Wong wrote:
> > [cc fuse-devel]
> >
> > TLDR for the fuse developers: Ted and I discovered a collision between
> > the upstream libfuse feature bits and the MacFUSE feature bits, which
> > causes macfuse to do the wrong thing if you try to enable symlink
> > pagecache.
>
> This is the patch for fuse2fs and fuse4fs in e2fsprogs which works
> around the problem (tested on MacOS using macfuse 5.2.0_1 from
> MacPorts). More details about why it was needed is in the commit
> description.
>
> - Ted
>
> From 67f1ec55a1309abead16cad883e38b798a567191 Mon Sep 17 00:00:00 2001
> From: Theodore Ts'o <tytso@mit.edu>
> Date: Wed, 6 May 2026 10:51:56 -0400
> Subject: [PATCH] fuse2fs, fuse4fs: Fix MacFuse compatibility issue
>
> Unfortunately, MacFuse is overloading the top bits of the flags field
> in struct fuse_init_{out} for MacFuse-specific capability extensions.
> This results in an attempt to use FUSE_CAP_CACHE_SYMLINKS when linking
> with the libfuse in MacPorts will end up enabling
> FUSE_DARWIN_CAP_ACCESS_EXT with MacFuse. Hilarity then ensues with
> all non-privileged access failing with permission denied.
>
> The change which is needed in MacFuse is described in a TODO(bf)
> statement:
>
> https://github.com/macfuse/library/blob/ddb630db8327a50b6670ef5e4f5e6da82a549e99/lib/fuse_lowlevel.c#L3415
>
> I plan to submit a bug report to MacFuse, but in the mean time, work around
> the problem by disabling the overloaded capability flags on MacOS.
>
> Link: https://lore.kernel.org/r/20260505225635.GT7765@frogsfrogsfrogs
> Link: https://lore.kernel.org/r/20260506092858.GC49070@macsyma.local
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Looks fine for now...
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
--D
> ---
> fuse4fs/fuse4fs.c | 18 ++++++++++++++++++
> misc/fuse2fs.c | 18 ++++++++++++++++++
> 2 files changed, 36 insertions(+)
>
> diff --git a/fuse4fs/fuse4fs.c b/fuse4fs/fuse4fs.c
> index 92847326..2739da92 100644
> --- a/fuse4fs/fuse4fs.c
> +++ b/fuse4fs/fuse4fs.c
> @@ -120,6 +120,24 @@
> #endif
> #endif /* !defined(ENODATA) */
>
> +#ifdef __APPLE__
> +/*
> + * Sigh.... MacFuse is overloading the top bits of the flags field in
> + * struct fuse_init_{out} for MacFuse-specific capability extensions.
> + * Avoid using these fuse3 capability flags until this gets fixed in
> + * MacFUSE
> + */
> +#undef FUSE_CAP_CACHE_SYMLINKS
> +#undef FUSE_CAP_NO_OPENDIR_SUPPORT
> +#undef FUSE_CAP_EXPLICIT_INVAL_DATA
> +#undef FUSE_CAP_EXPIRE_ONLY
> +#undef FUSE_CAP_SETXATTR_EXT
> +#undef FUSE_CAP_DIRECT_IO_ALLOW_MMAP
> +#undef FUSE_CAP_PASSTHROUGH
> +#undef FUSE_CAP_NO_EXPORT_SUPPORT
> +#undef FUSE_CAP_OVER_IO_URING
> +#endif
> +
> #define FUSE4FS_ATTR_TIMEOUT (0.0)
>
> static inline uint64_t round_up(uint64_t b, unsigned int align)
> diff --git a/misc/fuse2fs.c b/misc/fuse2fs.c
> index c46cfc23..0f2a3c35 100644
> --- a/misc/fuse2fs.c
> +++ b/misc/fuse2fs.c
> @@ -116,6 +116,24 @@
> #endif
> #endif /* !defined(ENODATA) */
>
> +#ifdef __APPLE__
> +/*
> + * Sigh.... MacFuse is overloading the top bits of the flags field in
> + * struct fuse_init_{out} for MacFuse-specific capability extensions.
> + * Avoid using these fuse3 capability flags until this gets fixed in
> + * MacFUSE
> + */
> +#undef FUSE_CAP_CACHE_SYMLINKS
> +#undef FUSE_CAP_NO_OPENDIR_SUPPORT
> +#undef FUSE_CAP_EXPLICIT_INVAL_DATA
> +#undef FUSE_CAP_EXPIRE_ONLY
> +#undef FUSE_CAP_SETXATTR_EXT
> +#undef FUSE_CAP_DIRECT_IO_ALLOW_MMAP
> +#undef FUSE_CAP_PASSTHROUGH
> +#undef FUSE_CAP_NO_EXPORT_SUPPORT
> +#undef FUSE_CAP_OVER_IO_URING
> +#endif
> +
> static inline uint64_t round_up(uint64_t b, unsigned int align)
> {
> unsigned int m;
> --
> 2.50.1 (Apple Git-155)
>
^ permalink raw reply
* [PATCH] jbd2: check for aborted handle in jbd2_journal_dirty_metadata()
From: Deepanshu Kartikey @ 2026-05-07 5:06 UTC (permalink / raw)
To: tytso, jack
Cc: linux-ext4, linux-kernel, Deepanshu Kartikey,
syzbot+98f651460e558a21baae
jbd2_journal_dirty_metadata() unconditionally dereferences
handle->h_transaction at function entry to obtain the journal pointer:
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
However, h_transaction may legitimately be NULL for an aborted handle.
The is_handle_aborted() helper in include/linux/jbd2.h explicitly
treats !h_transaction as one of the aborted states:
if (handle->h_aborted || !handle->h_transaction)
return 1;
Every other entry point in fs/jbd2/transaction.c
(jbd2_journal_get_{write,undo,create}_access, jbd2_journal_extend,
jbd2_journal_restart, jbd2_journal_stop, etc.) guards against this
with an is_handle_aborted() check before any dereference of
h_transaction. jbd2_journal_dirty_metadata() was missing this guard.
This is reachable from ocfs2's xattr code. ocfs2_xa_set() intentionally
falls through to ocfs2_xa_journal_dirty() even after
ocfs2_xa_prepare_entry() fails, on the assumption that the buffer
needs to be journaled to record any partial modifications (see the
comment above the out_dirty label in fs/ocfs2/xattr.c). If the failure
was caused by the journal being aborted -- e.g. an underlying I/O
error during a sub-operation such as __ocfs2_remove_xattr_range() --
the handle's h_transaction has been cleared by the abort path, and
the unconditional deref in jbd2_journal_dirty_metadata() becomes a
NULL deref.
Reproduced by syzbot with a crafted ocfs2 image where I/O against the
loop device backing the mount is sabotaged via LOOP_SET_STATUS64
between two setxattr() calls, causing the second setxattr (which
truncates an external xattr value) to abort the journal mid-flight:
Oops: general protection fault, probably for non-canonical
address 0xdffffc0000000000
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
RIP: jbd2_journal_dirty_metadata+0x4a/0xd30 fs/jbd2/transaction.c:1520
Call Trace:
ocfs2_journal_dirty+0x130/0x700 fs/ocfs2/journal.c:831
ocfs2_xa_journal_dirty fs/ocfs2/xattr.c:1483 [inline]
ocfs2_xa_set+0x15e3/0x2ec0 fs/ocfs2/xattr.c:2294
ocfs2_xattr_block_set+0x3e0/0x33c0 fs/ocfs2/xattr.c:3016
__ocfs2_xattr_set_handle+0x6b3/0xf50 fs/ocfs2/xattr.c:3418
ocfs2_xattr_set+0xf3f/0x13e0 fs/ocfs2/xattr.c:3681
__vfs_setxattr+0x43c/0x480 fs/xattr.c:218
...
Fix by adding the standard is_handle_aborted() guard at the top of
jbd2_journal_dirty_metadata() and returning -EROFS, matching the
pattern used by every other entry point in this file.
ocfs2_journal_dirty() already handles a non-zero return from
jbd2_journal_dirty_metadata() correctly.
Reported-by: syzbot+98f651460e558a21baae@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=98f651460e558a21baae
Tested-by: syzbot+98f651460e558a21baae@syzkaller.appspotmail.com
Signed-off-by: Deepanshu Kartikey <kartikey406@gmail.com>
---
fs/jbd2/transaction.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 4885903bbd10..aa0be9e9c876 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1516,14 +1516,19 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
*/
int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
{
- transaction_t *transaction = handle->h_transaction;
- journal_t *journal = transaction->t_journal;
+ transaction_t *transaction;
+ journal_t *journal;
struct journal_head *jh;
int ret = 0;
+ if (is_handle_aborted(handle))
+ return -EROFS;
if (!buffer_jbd(bh))
return -EUCLEAN;
+ transaction = handle->h_transaction;
+ journal = transaction->t_journal;
+
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
--
2.43.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox