From: "Darrick J. Wong" <djwong@kernel.org>
To: miklos@szeredi.hu
Cc: joannelkoong@gmail.com, bpf@vger.kernel.org, bernd@bsbernd.com,
neal@gompa.dev, linux-fsdevel@vger.kernel.org,
linux-ext4@vger.kernel.org
Subject: Re: [PATCH 01/12] fuse: cache iomaps
Date: Fri, 27 Feb 2026 10:07:56 -0800 [thread overview]
Message-ID: <20260227180756.GK13829@frogsfrogsfrogs> (raw)
In-Reply-To: <177188736047.3937557.12141456226808645989.stgit@frogsfrogsfrogs>
On Mon, Feb 23, 2026 at 03:20:40PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
>
> Cache iomaps to a file so that we don't have to upcall the server.
>
> Signed-off-by: "Darrick J. Wong" <djwong@kernel.org>
> ---
> fs/fuse/fuse_i.h | 3
> fs/fuse/fuse_iomap_cache.h | 99 +++
> include/uapi/linux/fuse.h | 5
> fs/fuse/Makefile | 2
> fs/fuse/fuse_iomap.c | 5
> fs/fuse/fuse_iomap_cache.c | 1701 ++++++++++++++++++++++++++++++++++++++++++++
> fs/fuse/trace.c | 1
> 7 files changed, 1815 insertions(+), 1 deletion(-)
> create mode 100644 fs/fuse/fuse_iomap_cache.h
> create mode 100644 fs/fuse/fuse_iomap_cache.c
>
>
> diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
> index b4da866187ba2c..1ba95d1f430c3e 100644
> --- a/fs/fuse/fuse_i.h
> +++ b/fs/fuse/fuse_i.h
> @@ -197,6 +197,9 @@ struct fuse_inode {
> spinlock_t ioend_lock;
> struct work_struct ioend_work;
> struct list_head ioend_list;
> +
> + /* cached iomap mappings */
> + struct fuse_iomap_cache *cache;
> #endif
> };
>
> diff --git a/fs/fuse/fuse_iomap_cache.h b/fs/fuse/fuse_iomap_cache.h
> new file mode 100644
> index 00000000000000..922ca182357aa7
> --- /dev/null
> +++ b/fs/fuse/fuse_iomap_cache.h
> @@ -0,0 +1,99 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * The fuse_iext code comes from xfs_iext_tree.[ch] and is:
> + * Copyright (c) 2017 Christoph Hellwig.
> + *
> + * Everything else is:
> + * Copyright (C) 2025-2026 Oracle. All Rights Reserved.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + */
> +#ifndef _FS_FUSE_IOMAP_CACHE_H
> +#define _FS_FUSE_IOMAP_CACHE_H
> +
> +#if IS_ENABLED(CONFIG_FUSE_IOMAP)
> +/*
> + * File incore extent information, present for read and write mappings.
> + */
> +struct fuse_iext_root {
> + /* bytes in ir_data, or -1 if it has never been used */
> + int64_t ir_bytes;
> + void *ir_data; /* extent tree root */
> + unsigned int ir_height; /* height of the extent tree */
> +};
> +
> +struct fuse_iomap_cache {
> + struct fuse_iext_root ic_read;
> + struct fuse_iext_root ic_write;
> + uint64_t ic_seq; /* validity counter */
> + struct rw_semaphore ic_lock; /* mapping lock */
> + struct inode *ic_inode;
> +};
> +
> +void fuse_iomap_cache_lock(struct inode *inode);
> +void fuse_iomap_cache_unlock(struct inode *inode);
> +void fuse_iomap_cache_lock_shared(struct inode *inode);
> +void fuse_iomap_cache_unlock_shared(struct inode *inode);
> +
> +struct fuse_iext_leaf;
> +
> +struct fuse_iext_cursor {
> + struct fuse_iext_leaf *leaf;
> + int pos;
> +};
> +
> +#define FUSE_IEXT_LEFT_CONTIG (1u << 0)
> +#define FUSE_IEXT_RIGHT_CONTIG (1u << 1)
> +#define FUSE_IEXT_LEFT_FILLING (1u << 2)
> +#define FUSE_IEXT_RIGHT_FILLING (1u << 3)
> +#define FUSE_IEXT_LEFT_VALID (1u << 4)
> +#define FUSE_IEXT_RIGHT_VALID (1u << 5)
> +#define FUSE_IEXT_WRITE_MAPPING (1u << 6)
> +
> +bool fuse_iext_get_extent(const struct fuse_iext_root *ir,
> + const struct fuse_iext_cursor *cur,
> + struct fuse_iomap_io *gotp);
> +
> +static inline uint64_t fuse_iext_read_seq(struct fuse_iomap_cache *ic)
> +{
> + return (uint64_t)READ_ONCE(ic->ic_seq);
> +}
> +
> +static inline void fuse_iomap_cache_init(struct fuse_inode *fi)
> +{
> + fi->cache = NULL;
> +}
> +
> +static inline bool fuse_inode_caches_iomaps(const struct inode *inode)
> +{
> + const struct fuse_inode *fi = get_fuse_inode(inode);
> +
> + return fi->cache != NULL;
> +}
> +
> +int fuse_iomap_cache_alloc(struct inode *inode);
> +void fuse_iomap_cache_free(struct inode *inode);
> +
> +int fuse_iomap_cache_remove(struct inode *inode, enum fuse_iomap_iodir iodir,
> + loff_t off, uint64_t len);
> +
> +int fuse_iomap_cache_upsert(struct inode *inode, enum fuse_iomap_iodir iodir,
> + const struct fuse_iomap_io *map);
> +
> +enum fuse_iomap_lookup_result {
> + LOOKUP_HIT,
> + LOOKUP_MISS,
> + LOOKUP_NOFORK,
> +};
> +
> +struct fuse_iomap_lookup {
> + struct fuse_iomap_io map; /* cached mapping */
> + uint64_t validity_cookie; /* used with .iomap_valid() */
> +};
> +
> +enum fuse_iomap_lookup_result
> +fuse_iomap_cache_lookup(struct inode *inode, enum fuse_iomap_iodir iodir,
> + loff_t off, uint64_t len,
> + struct fuse_iomap_lookup *mval);
> +#endif /* CONFIG_FUSE_IOMAP */
> +
> +#endif /* _FS_FUSE_IOMAP_CACHE_H */
> diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
> index bf8514a5ee27af..a273838bc20f2f 100644
> --- a/include/uapi/linux/fuse.h
> +++ b/include/uapi/linux/fuse.h
> @@ -1394,6 +1394,8 @@ struct fuse_uring_cmd_req {
>
> /* fuse-specific mapping type indicating that writes use the read mapping */
> #define FUSE_IOMAP_TYPE_PURE_OVERWRITE (255)
> +/* fuse-specific mapping type saying the server has populated the cache */
> +#define FUSE_IOMAP_TYPE_RETRY_CACHE (254)
>
> #define FUSE_IOMAP_DEV_NULL (0U) /* null device cookie */
>
> @@ -1551,4 +1553,7 @@ struct fuse_iomap_dev_inval_out {
> struct fuse_range range;
> };
>
> +/* invalidate all cached iomap mappings up to EOF */
> +#define FUSE_IOMAP_INVAL_TO_EOF (~0ULL)
> +
> #endif /* _LINUX_FUSE_H */
> diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
> index 2536bc6a71b898..c672503da7bcbd 100644
> --- a/fs/fuse/Makefile
> +++ b/fs/fuse/Makefile
> @@ -18,6 +18,6 @@ fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o
> fuse-$(CONFIG_FUSE_BACKING) += backing.o
> fuse-$(CONFIG_SYSCTL) += sysctl.o
> fuse-$(CONFIG_FUSE_IO_URING) += dev_uring.o
> -fuse-$(CONFIG_FUSE_IOMAP) += fuse_iomap.o
> +fuse-$(CONFIG_FUSE_IOMAP) += fuse_iomap.o fuse_iomap_cache.o
>
> virtiofs-y := virtio_fs.o
> diff --git a/fs/fuse/fuse_iomap.c b/fs/fuse/fuse_iomap.c
> index 9599efcf1c2593..849ce1626c35fd 100644
> --- a/fs/fuse/fuse_iomap.c
> +++ b/fs/fuse/fuse_iomap.c
> @@ -14,6 +14,7 @@
> #include "fuse_iomap.h"
> #include "fuse_iomap_i.h"
> #include "fuse_dev_i.h"
> +#include "fuse_iomap_cache.h"
>
> static bool __read_mostly enable_iomap =
> #if IS_ENABLED(CONFIG_FUSE_IOMAP_BY_DEFAULT)
> @@ -1179,6 +1180,8 @@ void fuse_iomap_evict_inode(struct inode *inode)
>
> trace_fuse_iomap_evict_inode(inode);
>
> + if (fuse_inode_caches_iomaps(inode))
> + fuse_iomap_cache_free(inode);
> fuse_inode_clear_atomic(inode);
> fuse_inode_clear_iomap(inode);
> }
> @@ -1886,6 +1889,8 @@ static inline void fuse_inode_set_iomap(struct inode *inode)
> min_order = inode->i_blkbits - PAGE_SHIFT;
>
> mapping_set_folio_min_order(inode->i_mapping, min_order);
> +
> + fuse_iomap_cache_init(fi);
> set_bit(FUSE_I_IOMAP, &fi->state);
> }
>
> diff --git a/fs/fuse/fuse_iomap_cache.c b/fs/fuse/fuse_iomap_cache.c
> new file mode 100644
> index 00000000000000..e32de8a5e3c325
> --- /dev/null
> +++ b/fs/fuse/fuse_iomap_cache.c
> @@ -0,0 +1,1701 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fuse_iext* code adapted from xfs_iext_tree.c:
> + * Copyright (c) 2017 Christoph Hellwig.
> + *
> + * fuse_iomap_cache*lock* code adapted from xfs_inode.c:
> + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
> + * All Rights Reserved.
> + *
> + * Copyright (C) 2025-2026 Oracle. All Rights Reserved.
> + * Author: Darrick J. Wong <djwong@kernel.org>
> + */
> +#include "fuse_i.h"
> +#include "fuse_trace.h"
> +#include "fuse_iomap_i.h"
> +#include "fuse_iomap.h"
> +#include "fuse_iomap_cache.h"
> +#include <linux/iomap.h>
> +
> +void fuse_iomap_cache_lock_shared(struct inode *inode)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> +
> + down_read(&ic->ic_lock);
> +}
> +
> +void fuse_iomap_cache_unlock_shared(struct inode *inode)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> +
> + up_read(&ic->ic_lock);
> +}
> +
> +void fuse_iomap_cache_lock(struct inode *inode)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> +
> + down_write(&ic->ic_lock);
> +}
> +
> +void fuse_iomap_cache_unlock(struct inode *inode)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> +
> + up_write(&ic->ic_lock);
> +}
> +
> +static inline void assert_cache_locked_shared(struct fuse_iomap_cache *ic)
> +{
> + rwsem_assert_held(&ic->ic_lock);
> +}
> +
> +static inline void assert_cache_locked(struct fuse_iomap_cache *ic)
> +{
> + rwsem_assert_held_write_nolockdep(&ic->ic_lock);
> +}
> +
> +/*
> + * In-core extent btree block layout:
> + *
> + * There are two types of blocks in the btree: leaf and inner (non-leaf) blocks.
> + *
> + * The leaf blocks are made up by %KEYS_PER_NODE extent records, which each
> + * contain the startoffset, blockcount, startblock and unwritten extent flag.
> + * See above for the exact format, followed by pointers to the previous and next
> + * leaf blocks (if there are any).
> + *
> + * The inner (non-leaf) blocks first contain KEYS_PER_NODE lookup keys, followed
> + * by an equal number of pointers to the btree blocks at the next lower level.
> + *
> + * +-------+-------+-------+-------+-------+----------+----------+
> + * Leaf: | rec 1 | rec 2 | rec 3 | rec 4 | rec N | prev-ptr | next-ptr |
> + * +-------+-------+-------+-------+-------+----------+----------+
> + *
> + * +-------+-------+-------+-------+-------+-------+------+-------+
> + * Inner: | key 1 | key 2 | key 3 | key N | ptr 1 | ptr 2 | ptr3 | ptr N |
> + * +-------+-------+-------+-------+-------+-------+------+-------+
> + */
> +typedef uint64_t fuse_iext_key_t;
> +#define FUSE_IEXT_KEY_INVALID (1ULL << 63)
> +
> +enum {
> + NODE_SIZE = 256,
> + KEYS_PER_NODE = NODE_SIZE / (sizeof(fuse_iext_key_t) + sizeof(void *)),
> + RECS_PER_LEAF = (NODE_SIZE - (2 * sizeof(struct fuse_iext_leaf *))) /
> + sizeof(struct fuse_iomap_io),
> +};
> +
> +/* maximum length of a mapping that we're willing to cache */
> +#define FUSE_IOMAP_MAX_LEN ((loff_t)(1ULL << 63))
> +
> +struct fuse_iext_node {
> + fuse_iext_key_t keys[KEYS_PER_NODE];
> + void *ptrs[KEYS_PER_NODE];
> +};
> +
> +struct fuse_iext_leaf {
> + struct fuse_iomap_io recs[RECS_PER_LEAF];
> + struct fuse_iext_leaf *prev;
> + struct fuse_iext_leaf *next;
> +};
> +
> +static uint32_t
> +fuse_iomap_fork_to_state(const struct fuse_iomap_cache *ic,
> + const struct fuse_iext_root *ir)
> +{
> + ASSERT(ir == &ic->ic_write || ir == &ic->ic_read);
> +
> + if (ir == &ic->ic_write)
> + return FUSE_IEXT_WRITE_MAPPING;
> + return 0;
> +}
> +
> +/* Convert bmap state flags to an inode fork. */
> +static struct fuse_iext_root *
> +fuse_iext_state_to_fork(
> + struct fuse_iomap_cache *ic,
> + uint32_t state)
> +{
> + if (state & FUSE_IEXT_WRITE_MAPPING)
> + return &ic->ic_write;
> + return &ic->ic_read;
> +}
> +
> +/* The internal iext tree record is a struct fuse_iomap_io */
> +
> +static inline bool fuse_iext_rec_is_empty(const struct fuse_iomap_io *rec)
> +{
> + return rec->length == 0;
> +}
> +
> +static inline void fuse_iext_rec_clear(struct fuse_iomap_io *rec)
> +{
> + memset(rec, 0, sizeof(*rec));
> +}
> +
> +static inline void
> +fuse_iext_set(
> + struct fuse_iomap_io *rec,
> + const struct fuse_iomap_io *irec)
> +{
> + ASSERT(irec->length > 0);
> +
> + *rec = *irec;
> +}
> +
> +static inline void
> +fuse_iext_get(
> + struct fuse_iomap_io *irec,
> + const struct fuse_iomap_io *rec)
> +{
> + *irec = *rec;
> +}
> +
> +static inline uint64_t fuse_iext_count(const struct fuse_iext_root *ir)
> +{
> + return ir->ir_bytes / sizeof(struct fuse_iomap_io);
> +}
> +
> +static inline int fuse_iext_max_recs(const struct fuse_iext_root *ir)
> +{
> + if (ir->ir_height == 1)
> + return fuse_iext_count(ir);
> + return RECS_PER_LEAF;
> +}
> +
> +static inline struct fuse_iomap_io *cur_rec(const struct fuse_iext_cursor *cur)
> +{
> + return &cur->leaf->recs[cur->pos];
> +}
> +
> +static bool fuse_iext_valid(const struct fuse_iext_root *ir,
> + const struct fuse_iext_cursor *cur)
> +{
> + if (!cur->leaf)
> + return false;
> + if (cur->pos < 0 || cur->pos >= fuse_iext_max_recs(ir))
> + return false;
> + if (fuse_iext_rec_is_empty(cur_rec(cur)))
> + return false;
> + return true;
> +}
> +
> +static void *
> +fuse_iext_find_first_leaf(
> + struct fuse_iext_root *ir)
> +{
> + struct fuse_iext_node *node = ir->ir_data;
> + int height;
> +
> + if (!ir->ir_height)
> + return NULL;
> +
> + for (height = ir->ir_height; height > 1; height--) {
> + node = node->ptrs[0];
> + ASSERT(node);
> + }
> +
> + return node;
> +}
> +
> +static void *
> +fuse_iext_find_last_leaf(
> + struct fuse_iext_root *ir)
> +{
> + struct fuse_iext_node *node = ir->ir_data;
> + int height, i;
> +
> + if (!ir->ir_height)
> + return NULL;
> +
> + for (height = ir->ir_height; height > 1; height--) {
> + for (i = 1; i < KEYS_PER_NODE; i++)
> + if (!node->ptrs[i])
> + break;
> + node = node->ptrs[i - 1];
> + ASSERT(node);
> + }
> +
> + return node;
> +}
> +
> +static void
> +fuse_iext_first(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur)
> +{
> + cur->pos = 0;
> + cur->leaf = fuse_iext_find_first_leaf(ir);
> +}
> +
> +static void
> +fuse_iext_last(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur)
> +{
> + int i;
> +
> + cur->leaf = fuse_iext_find_last_leaf(ir);
> + if (!cur->leaf) {
> + cur->pos = 0;
> + return;
> + }
> +
> + for (i = 1; i < fuse_iext_max_recs(ir); i++) {
> + if (fuse_iext_rec_is_empty(&cur->leaf->recs[i]))
> + break;
> + }
> + cur->pos = i - 1;
> +}
> +
> +static void
> +fuse_iext_next(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur)
> +{
> + if (!cur->leaf) {
> + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
> + fuse_iext_first(ir, cur);
> + return;
> + }
> +
> + ASSERT(cur->pos >= 0);
> + ASSERT(cur->pos < fuse_iext_max_recs(ir));
> +
> + cur->pos++;
> + if (ir->ir_height > 1 && !fuse_iext_valid(ir, cur) &&
> + cur->leaf->next) {
> + cur->leaf = cur->leaf->next;
> + cur->pos = 0;
> + }
> +}
> +
> +static void
> +fuse_iext_prev(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur)
> +{
> + if (!cur->leaf) {
> + ASSERT(cur->pos <= 0 || cur->pos >= RECS_PER_LEAF);
> + fuse_iext_last(ir, cur);
> + return;
> + }
> +
> + ASSERT(cur->pos >= 0);
> + ASSERT(cur->pos <= RECS_PER_LEAF);
> +
> +recurse:
> + do {
> + cur->pos--;
> + if (fuse_iext_valid(ir, cur))
> + return;
> + } while (cur->pos > 0);
> +
> + if (ir->ir_height > 1 && cur->leaf->prev) {
> + cur->leaf = cur->leaf->prev;
> + cur->pos = RECS_PER_LEAF;
> + goto recurse;
> + }
> +}
> +
> +/*
> + * Return true if the cursor points at an extent and return the extent structure
> + * in gotp. Else return false.
> + */
> +bool
> +fuse_iext_get_extent(
> + const struct fuse_iext_root *ir,
> + const struct fuse_iext_cursor *cur,
> + struct fuse_iomap_io *gotp)
> +{
> + if (!fuse_iext_valid(ir, cur))
> + return false;
> + fuse_iext_get(gotp, cur_rec(cur));
> + return true;
> +}
> +
> +static inline bool fuse_iext_next_extent(struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur, struct fuse_iomap_io *gotp)
> +{
> + fuse_iext_next(ir, cur);
> + return fuse_iext_get_extent(ir, cur, gotp);
> +}
> +
> +static inline bool fuse_iext_prev_extent(struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur, struct fuse_iomap_io *gotp)
> +{
> + fuse_iext_prev(ir, cur);
> + return fuse_iext_get_extent(ir, cur, gotp);
> +}
> +
> +/*
> + * Return the extent after cur in gotp without updating the cursor.
> + */
> +static inline bool fuse_iext_peek_next_extent(struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur, struct fuse_iomap_io *gotp)
> +{
> + struct fuse_iext_cursor ncur = *cur;
> +
> + fuse_iext_next(ir, &ncur);
> + return fuse_iext_get_extent(ir, &ncur, gotp);
> +}
> +
> +/*
> + * Return the extent before cur in gotp without updating the cursor.
> + */
> +static inline bool fuse_iext_peek_prev_extent(struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur, struct fuse_iomap_io *gotp)
> +{
> + struct fuse_iext_cursor ncur = *cur;
> +
> + fuse_iext_prev(ir, &ncur);
> + return fuse_iext_get_extent(ir, &ncur, gotp);
> +}
> +
> +static inline int
> +fuse_iext_key_cmp(
> + struct fuse_iext_node *node,
> + int n,
> + loff_t offset)
> +{
> + if (node->keys[n] > offset)
> + return 1;
> + if (node->keys[n] < offset)
> + return -1;
> + return 0;
> +}
> +
> +static inline int
> +fuse_iext_rec_cmp(
> + struct fuse_iomap_io *rec,
> + loff_t offset)
> +{
> + if (rec->offset > offset)
> + return 1;
> + if (rec->offset + rec->length <= offset)
> + return -1;
> + return 0;
> +}
> +
> +static void *
> +fuse_iext_find_level(
> + struct fuse_iext_root *ir,
> + loff_t offset,
> + int level)
> +{
> + struct fuse_iext_node *node = ir->ir_data;
> + int height, i;
> +
> + if (!ir->ir_height)
> + return NULL;
> +
> + for (height = ir->ir_height; height > level; height--) {
> + for (i = 1; i < KEYS_PER_NODE; i++)
> + if (fuse_iext_key_cmp(node, i, offset) > 0)
> + break;
> +
> + node = node->ptrs[i - 1];
> + if (!node)
> + break;
> + }
> +
> + return node;
> +}
> +
> +static int
> +fuse_iext_node_pos(
> + struct fuse_iext_node *node,
> + loff_t offset)
> +{
> + int i;
> +
> + for (i = 1; i < KEYS_PER_NODE; i++) {
> + if (fuse_iext_key_cmp(node, i, offset) > 0)
> + break;
> + }
> +
> + return i - 1;
> +}
> +
> +static int
> +fuse_iext_node_insert_pos(
> + struct fuse_iext_node *node,
> + loff_t offset)
> +{
> + int i;
> +
> + for (i = 0; i < KEYS_PER_NODE; i++) {
> + if (fuse_iext_key_cmp(node, i, offset) > 0)
> + return i;
> + }
> +
> + return KEYS_PER_NODE;
> +}
> +
> +static int
> +fuse_iext_node_nr_entries(
> + struct fuse_iext_node *node,
> + int start)
> +{
> + int i;
> +
> + for (i = start; i < KEYS_PER_NODE; i++) {
> + if (node->keys[i] == FUSE_IEXT_KEY_INVALID)
> + break;
> + }
> +
> + return i;
> +}
> +
> +static int
> +fuse_iext_leaf_nr_entries(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_leaf *leaf,
> + int start)
> +{
> + int i;
> +
> + for (i = start; i < fuse_iext_max_recs(ir); i++) {
> + if (fuse_iext_rec_is_empty(&leaf->recs[i]))
> + break;
> + }
> +
> + return i;
> +}
> +
> +static inline fuse_iext_key_t
> +fuse_iext_leaf_key(
> + struct fuse_iext_leaf *leaf,
> + int n)
> +{
> + return leaf->recs[n].offset;
> +}
> +
> +static inline void *
> +fuse_iext_alloc_node(
> + int size)
> +{
> + return kzalloc(size, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
> +}
> +
> +static void
> +fuse_iext_grow(
> + struct fuse_iext_root *ir)
> +{
> + struct fuse_iext_node *node = fuse_iext_alloc_node(NODE_SIZE);
> + int i;
> +
> + if (ir->ir_height == 1) {
> + struct fuse_iext_leaf *prev = ir->ir_data;
> +
> + node->keys[0] = fuse_iext_leaf_key(prev, 0);
> + node->ptrs[0] = prev;
> + } else {
> + struct fuse_iext_node *prev = ir->ir_data;
> +
> + ASSERT(ir->ir_height > 1);
> +
> + node->keys[0] = prev->keys[0];
> + node->ptrs[0] = prev;
> + }
> +
> + for (i = 1; i < KEYS_PER_NODE; i++)
> + node->keys[i] = FUSE_IEXT_KEY_INVALID;
> +
> + ir->ir_data = node;
> + ir->ir_height++;
> +}
> +
> +static void
> +fuse_iext_update_node(
> + struct fuse_iext_root *ir,
> + loff_t old_offset,
> + loff_t new_offset,
> + int level,
> + void *ptr)
> +{
> + struct fuse_iext_node *node = ir->ir_data;
> + int height, i;
> +
> + for (height = ir->ir_height; height > level; height--) {
> + for (i = 0; i < KEYS_PER_NODE; i++) {
> + if (i > 0 && fuse_iext_key_cmp(node, i, old_offset) > 0)
> + break;
> + if (node->keys[i] == old_offset)
> + node->keys[i] = new_offset;
> + }
> + node = node->ptrs[i - 1];
> + ASSERT(node);
> + }
> +
> + ASSERT(node == ptr);
> +}
> +
> +static struct fuse_iext_node *
> +fuse_iext_split_node(
> + struct fuse_iext_node **nodep,
> + int *pos,
> + int *nr_entries)
> +{
> + struct fuse_iext_node *node = *nodep;
> + struct fuse_iext_node *new = fuse_iext_alloc_node(NODE_SIZE);
> + const int nr_move = KEYS_PER_NODE / 2;
> + int nr_keep = nr_move + (KEYS_PER_NODE & 1);
> + int i = 0;
> +
> + /* for sequential append operations just spill over into the new node */
> + if (*pos == KEYS_PER_NODE) {
> + *nodep = new;
> + *pos = 0;
> + *nr_entries = 0;
> + goto done;
> + }
> +
> +
> + for (i = 0; i < nr_move; i++) {
> + new->keys[i] = node->keys[nr_keep + i];
> + new->ptrs[i] = node->ptrs[nr_keep + i];
> +
> + node->keys[nr_keep + i] = FUSE_IEXT_KEY_INVALID;
> + node->ptrs[nr_keep + i] = NULL;
> + }
> +
> + if (*pos >= nr_keep) {
> + *nodep = new;
> + *pos -= nr_keep;
> + *nr_entries = nr_move;
> + } else {
> + *nr_entries = nr_keep;
> + }
> +done:
> + for (; i < KEYS_PER_NODE; i++)
> + new->keys[i] = FUSE_IEXT_KEY_INVALID;
> + return new;
> +}
> +
> +static void
> +fuse_iext_insert_node(
> + struct fuse_iext_root *ir,
> + fuse_iext_key_t offset,
> + void *ptr,
> + int level)
> +{
> + struct fuse_iext_node *node, *new;
> + int i, pos, nr_entries;
> +
> +again:
> + if (ir->ir_height < level)
> + fuse_iext_grow(ir);
> +
> + new = NULL;
> + node = fuse_iext_find_level(ir, offset, level);
> + pos = fuse_iext_node_insert_pos(node, offset);
> + nr_entries = fuse_iext_node_nr_entries(node, pos);
> +
> + ASSERT(pos >= nr_entries || fuse_iext_key_cmp(node, pos, offset) != 0);
> + ASSERT(nr_entries <= KEYS_PER_NODE);
> +
> + if (nr_entries == KEYS_PER_NODE)
> + new = fuse_iext_split_node(&node, &pos, &nr_entries);
> +
> + /*
> + * Update the pointers in higher levels if the first entry changes
> + * in an existing node.
> + */
> + if (node != new && pos == 0 && nr_entries > 0)
> + fuse_iext_update_node(ir, node->keys[0], offset, level, node);
> +
> + for (i = nr_entries; i > pos; i--) {
> + node->keys[i] = node->keys[i - 1];
> + node->ptrs[i] = node->ptrs[i - 1];
> + }
> + node->keys[pos] = offset;
> + node->ptrs[pos] = ptr;
> +
> + if (new) {
> + offset = new->keys[0];
> + ptr = new;
> + level++;
> + goto again;
> + }
> +}
> +
> +static struct fuse_iext_leaf *
> +fuse_iext_split_leaf(
> + struct fuse_iext_cursor *cur,
> + int *nr_entries)
> +{
> + struct fuse_iext_leaf *leaf = cur->leaf;
> + struct fuse_iext_leaf *new = fuse_iext_alloc_node(NODE_SIZE);
> + const int nr_move = RECS_PER_LEAF / 2;
> + int nr_keep = nr_move + (RECS_PER_LEAF & 1);
> + int i;
> +
> + /* for sequential append operations just spill over into the new node */
> + if (cur->pos == RECS_PER_LEAF) {
> + cur->leaf = new;
> + cur->pos = 0;
> + *nr_entries = 0;
> + goto done;
> + }
> +
> + for (i = 0; i < nr_move; i++) {
> + new->recs[i] = leaf->recs[nr_keep + i];
> + fuse_iext_rec_clear(&leaf->recs[nr_keep + i]);
> + }
> +
> + if (cur->pos >= nr_keep) {
> + cur->leaf = new;
> + cur->pos -= nr_keep;
> + *nr_entries = nr_move;
> + } else {
> + *nr_entries = nr_keep;
> + }
> +done:
> + if (leaf->next)
> + leaf->next->prev = new;
> + new->next = leaf->next;
> + new->prev = leaf;
> + leaf->next = new;
> + return new;
> +}
> +
> +static void
> +fuse_iext_alloc_root(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur)
> +{
> + ASSERT(ir->ir_bytes == 0);
> +
> + ir->ir_data = fuse_iext_alloc_node(sizeof(struct fuse_iomap_io));
> + ir->ir_height = 1;
> +
> + /* now that we have a node step into it */
> + cur->leaf = ir->ir_data;
> + cur->pos = 0;
> +}
> +
> +static void
> +fuse_iext_realloc_root(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur)
> +{
> + int64_t new_size = ir->ir_bytes + sizeof(struct fuse_iomap_io);
> + void *new;
> +
> + /* account for the prev/next pointers */
> + if (new_size / sizeof(struct fuse_iomap_io) == RECS_PER_LEAF)
> + new_size = NODE_SIZE;
> +
> + new = krealloc(ir->ir_data, new_size,
> + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
> + memset(new + ir->ir_bytes, 0, new_size - ir->ir_bytes);
> + ir->ir_data = new;
> + cur->leaf = new;
> +}
> +
> +/*
> + * Increment the sequence counter on extent tree changes. We use WRITE_ONCE
> + * here to ensure the update to the sequence counter is seen before the
> + * modifications to the extent tree itself take effect.
> + */
> +static inline void fuse_iext_inc_seq(struct fuse_iomap_cache *ic)
> +{
> + WRITE_ONCE(ic->ic_seq, READ_ONCE(ic->ic_seq) + 1);
> +}
> +
> +static void
> +fuse_iext_insert_raw(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur,
> + const struct fuse_iomap_io *irec)
> +{
> + loff_t offset = irec->offset;
> + struct fuse_iext_leaf *new = NULL;
> + int nr_entries, i;
> +
> + fuse_iext_inc_seq(ic);
> +
> + if (ir->ir_height == 0)
> + fuse_iext_alloc_root(ir, cur);
> + else if (ir->ir_height == 1)
> + fuse_iext_realloc_root(ir, cur);
> +
> + nr_entries = fuse_iext_leaf_nr_entries(ir, cur->leaf, cur->pos);
> + ASSERT(nr_entries <= RECS_PER_LEAF);
> + ASSERT(cur->pos >= nr_entries ||
> + fuse_iext_rec_cmp(cur_rec(cur), irec->offset) != 0);
> +
> + if (nr_entries == RECS_PER_LEAF)
> + new = fuse_iext_split_leaf(cur, &nr_entries);
> +
> + /*
> + * Update the pointers in higher levels if the first entry changes
> + * in an existing node.
> + */
> + if (cur->leaf != new && cur->pos == 0 && nr_entries > 0) {
> + fuse_iext_update_node(ir, fuse_iext_leaf_key(cur->leaf, 0),
> + offset, 1, cur->leaf);
> + }
> +
> + for (i = nr_entries; i > cur->pos; i--)
> + cur->leaf->recs[i] = cur->leaf->recs[i - 1];
> + fuse_iext_set(cur_rec(cur), irec);
> + ir->ir_bytes += sizeof(struct fuse_iomap_io);
> +
> + if (new)
> + fuse_iext_insert_node(ir, fuse_iext_leaf_key(new, 0), new, 2);
> +}
> +
> +static void
> +fuse_iext_insert(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_cursor *cur,
> + const struct fuse_iomap_io *irec,
> + uint32_t state)
> +{
> + struct fuse_iext_root *ir = fuse_iext_state_to_fork(ic, state);
> +
> + fuse_iext_insert_raw(ic, ir, cur, irec);
> +}
> +
> +static struct fuse_iext_node *
> +fuse_iext_rebalance_node(
> + struct fuse_iext_node *parent,
> + int *pos,
> + struct fuse_iext_node *node,
> + int nr_entries)
> +{
> + /*
> + * If the neighbouring nodes are completely full, or have different
> + * parents, we might never be able to merge our node, and will only
> + * delete it once the number of entries hits zero.
> + */
> + if (nr_entries == 0)
> + return node;
> +
> + if (*pos > 0) {
> + struct fuse_iext_node *prev = parent->ptrs[*pos - 1];
> + int nr_prev = fuse_iext_node_nr_entries(prev, 0), i;
> +
> + if (nr_prev + nr_entries <= KEYS_PER_NODE) {
> + for (i = 0; i < nr_entries; i++) {
> + prev->keys[nr_prev + i] = node->keys[i];
> + prev->ptrs[nr_prev + i] = node->ptrs[i];
> + }
> + return node;
> + }
> + }
> +
> + if (*pos + 1 < fuse_iext_node_nr_entries(parent, *pos)) {
> + struct fuse_iext_node *next = parent->ptrs[*pos + 1];
> + int nr_next = fuse_iext_node_nr_entries(next, 0), i;
> +
> + if (nr_entries + nr_next <= KEYS_PER_NODE) {
> + /*
> + * Merge the next node into this node so that we don't
> + * have to do an additional update of the keys in the
> + * higher levels.
> + */
> + for (i = 0; i < nr_next; i++) {
> + node->keys[nr_entries + i] = next->keys[i];
> + node->ptrs[nr_entries + i] = next->ptrs[i];
> + }
> +
> + ++*pos;
> + return next;
> + }
> + }
> +
> + return NULL;
> +}
> +
> +static void
> +fuse_iext_remove_node(
> + struct fuse_iext_root *ir,
> + loff_t offset,
> + void *victim)
> +{
> + struct fuse_iext_node *node, *parent;
> + int level = 2, pos, nr_entries, i;
> +
> + ASSERT(level <= ir->ir_height);
> + node = fuse_iext_find_level(ir, offset, level);
> + pos = fuse_iext_node_pos(node, offset);
> +again:
> + ASSERT(node->ptrs[pos]);
> + ASSERT(node->ptrs[pos] == victim);
> + kfree(victim);
> +
> + nr_entries = fuse_iext_node_nr_entries(node, pos) - 1;
> + offset = node->keys[0];
> + for (i = pos; i < nr_entries; i++) {
> + node->keys[i] = node->keys[i + 1];
> + node->ptrs[i] = node->ptrs[i + 1];
> + }
> + node->keys[nr_entries] = FUSE_IEXT_KEY_INVALID;
> + node->ptrs[nr_entries] = NULL;
> +
> + if (pos == 0 && nr_entries > 0) {
> + fuse_iext_update_node(ir, offset, node->keys[0], level, node);
> + offset = node->keys[0];
> + }
> +
> + if (nr_entries >= KEYS_PER_NODE / 2)
> + return;
> +
> + if (level < ir->ir_height) {
> + /*
> + * If we aren't at the root yet try to find a neighbour node to
> + * merge with (or delete the node if it is empty), and then
> + * recurse up to the next level.
> + */
> + level++;
> + parent = fuse_iext_find_level(ir, offset, level);
> + pos = fuse_iext_node_pos(parent, offset);
> +
> + ASSERT(pos != KEYS_PER_NODE);
> + ASSERT(parent->ptrs[pos] == node);
> +
> + node = fuse_iext_rebalance_node(parent, &pos, node, nr_entries);
> + if (node) {
> + victim = node;
> + node = parent;
> + goto again;
> + }
> + } else if (nr_entries == 1) {
> + /*
> + * If we are at the root and only one entry is left we can just
> + * free this node and update the root pointer.
> + */
> + ASSERT(node == ir->ir_data);
> + ir->ir_data = node->ptrs[0];
> + ir->ir_height--;
> + kfree(node);
> + }
> +}
> +
> +static void
> +fuse_iext_rebalance_leaf(
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *cur,
> + struct fuse_iext_leaf *leaf,
> + loff_t offset,
> + int nr_entries)
> +{
> + /*
> + * If the neighbouring nodes are completely full we might never be able
> + * to merge our node, and will only delete it once the number of
> + * entries hits zero.
> + */
> + if (nr_entries == 0)
> + goto remove_node;
> +
> + if (leaf->prev) {
> + int nr_prev = fuse_iext_leaf_nr_entries(ir, leaf->prev, 0), i;
> +
> + if (nr_prev + nr_entries <= RECS_PER_LEAF) {
> + for (i = 0; i < nr_entries; i++)
> + leaf->prev->recs[nr_prev + i] = leaf->recs[i];
> +
> + if (cur->leaf == leaf) {
> + cur->leaf = leaf->prev;
> + cur->pos += nr_prev;
> + }
> + goto remove_node;
> + }
> + }
> +
> + if (leaf->next) {
> + int nr_next = fuse_iext_leaf_nr_entries(ir, leaf->next, 0), i;
> +
> + if (nr_entries + nr_next <= RECS_PER_LEAF) {
> + /*
> + * Merge the next node into this node so that we don't
> + * have to do an additional update of the keys in the
> + * higher levels.
> + */
> + for (i = 0; i < nr_next; i++) {
> + leaf->recs[nr_entries + i] =
> + leaf->next->recs[i];
> + }
> +
> + if (cur->leaf == leaf->next) {
> + cur->leaf = leaf;
> + cur->pos += nr_entries;
> + }
> +
> + offset = fuse_iext_leaf_key(leaf->next, 0);
> + leaf = leaf->next;
> + goto remove_node;
> + }
> + }
> +
> + return;
> +remove_node:
> + if (leaf->prev)
> + leaf->prev->next = leaf->next;
> + if (leaf->next)
> + leaf->next->prev = leaf->prev;
> + fuse_iext_remove_node(ir, offset, leaf);
> +}
> +
> +static void
> +fuse_iext_free_last_leaf(
> + struct fuse_iext_root *ir)
> +{
> + ir->ir_height--;
> + kfree(ir->ir_data);
> + ir->ir_data = NULL;
> +}
> +
> +static void
> +fuse_iext_remove(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_cursor *cur,
> + uint32_t state)
> +{
> + struct fuse_iext_root *ir = fuse_iext_state_to_fork(ic, state);
> + struct fuse_iext_leaf *leaf = cur->leaf;
> + loff_t offset = fuse_iext_leaf_key(leaf, 0);
> + int i, nr_entries;
> +
> + ASSERT(ir->ir_height > 0);
> + ASSERT(ir->ir_data != NULL);
> + ASSERT(fuse_iext_valid(ir, cur));
> +
> + fuse_iext_inc_seq(ic);
> +
> + nr_entries = fuse_iext_leaf_nr_entries(ir, leaf, cur->pos) - 1;
> + for (i = cur->pos; i < nr_entries; i++)
> + leaf->recs[i] = leaf->recs[i + 1];
> + fuse_iext_rec_clear(&leaf->recs[nr_entries]);
> + ir->ir_bytes -= sizeof(struct fuse_iomap_io);
> +
> + if (cur->pos == 0 && nr_entries > 0) {
> + fuse_iext_update_node(ir, offset, fuse_iext_leaf_key(leaf, 0), 1,
> + leaf);
> + offset = fuse_iext_leaf_key(leaf, 0);
> + } else if (cur->pos == nr_entries) {
> + if (ir->ir_height > 1 && leaf->next)
> + cur->leaf = leaf->next;
> + else
> + cur->leaf = NULL;
> + cur->pos = 0;
> + }
> +
> + if (nr_entries >= RECS_PER_LEAF / 2)
> + return;
> +
> + if (ir->ir_height > 1)
> + fuse_iext_rebalance_leaf(ir, cur, leaf, offset, nr_entries);
> + else if (nr_entries == 0)
> + fuse_iext_free_last_leaf(ir);
> +}
> +
> +/*
> + * Lookup the extent covering offset.
> + *
> + * If there is an extent covering offset return the extent index, and store the
> + * expanded extent structure in *gotp, and the extent cursor in *cur.
> + * If there is no extent covering offset, but there is an extent after it (e.g.
> + * it lies in a hole) return that extent in *gotp and its cursor in *cur
> + * instead.
> + * If offset is beyond the last extent return false, and return an invalid
> + * cursor value.
> + */
> +static bool
> +fuse_iext_lookup_extent(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_root *ir,
> + loff_t offset,
> + struct fuse_iext_cursor *cur,
> + struct fuse_iomap_io *gotp)
> +{
> + cur->leaf = fuse_iext_find_level(ir, offset, 1);
> + if (!cur->leaf) {
> + cur->pos = 0;
> + return false;
> + }
> +
> + for (cur->pos = 0; cur->pos < fuse_iext_max_recs(ir); cur->pos++) {
> + struct fuse_iomap_io *rec = cur_rec(cur);
> +
> + if (fuse_iext_rec_is_empty(rec))
> + break;
> + if (fuse_iext_rec_cmp(rec, offset) >= 0)
> + goto found;
> + }
> +
> + /* Try looking in the next node for an entry > offset */
> + if (ir->ir_height == 1 || !cur->leaf->next)
> + return false;
> + cur->leaf = cur->leaf->next;
> + cur->pos = 0;
> + if (!fuse_iext_valid(ir, cur))
> + return false;
> +found:
> + fuse_iext_get(gotp, cur_rec(cur));
> + return true;
> +}
> +
> +/*
> + * Returns the last extent before end, and if this extent doesn't cover
> + * end, update end to the end of the extent.
> + */
> +static bool
> +fuse_iext_lookup_extent_before(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_root *ir,
> + loff_t *end,
> + struct fuse_iext_cursor *cur,
> + struct fuse_iomap_io *gotp)
> +{
> + /* could be optimized to not even look up the next on a match.. */
> + if (fuse_iext_lookup_extent(ic, ir, *end - 1, cur, gotp) &&
> + gotp->offset <= *end - 1)
> + return true;
> + if (!fuse_iext_prev_extent(ir, cur, gotp))
> + return false;
> + *end = gotp->offset + gotp->length;
> + return true;
> +}
> +
> +static void
> +fuse_iext_update_extent(
> + struct fuse_iomap_cache *ic,
> + uint32_t state,
> + struct fuse_iext_cursor *cur,
> + struct fuse_iomap_io *new)
> +{
> + struct fuse_iext_root *ir = fuse_iext_state_to_fork(ic, state);
> +
> + fuse_iext_inc_seq(ic);
> +
> + if (cur->pos == 0) {
> + struct fuse_iomap_io old;
> +
> + fuse_iext_get(&old, cur_rec(cur));
> + if (new->offset != old.offset) {
> + fuse_iext_update_node(ir, old.offset,
> + new->offset, 1, cur->leaf);
> + }
> + }
> +
> + fuse_iext_set(cur_rec(cur), new);
> +}
> +
> +/*
> + * This is a recursive function, because of that we need to be extremely
> + * careful with stack usage.
> + */
> +static void
> +fuse_iext_destroy_node(
> + struct fuse_iext_node *node,
> + int level)
> +{
> + int i;
> +
> + if (level > 1) {
> + for (i = 0; i < KEYS_PER_NODE; i++) {
> + if (node->keys[i] == FUSE_IEXT_KEY_INVALID)
> + break;
> + fuse_iext_destroy_node(node->ptrs[i], level - 1);
> + }
> + }
> +
> + kfree(node);
> +}
> +
> +static void
> +fuse_iext_destroy(
> + struct fuse_iext_root *ir)
> +{
> + fuse_iext_destroy_node(ir->ir_data, ir->ir_height);
> +
> + ir->ir_bytes = 0;
> + ir->ir_height = 0;
> + ir->ir_data = NULL;
> +}
> +
> +static inline struct fuse_iext_root *
> +fuse_iext_root_ptr(
> + struct fuse_iomap_cache *ic,
> + enum fuse_iomap_iodir iodir)
> +{
> + switch (iodir) {
> + case READ_MAPPING:
> + return &ic->ic_read;
> + case WRITE_MAPPING:
> + return &ic->ic_write;
> + default:
> + ASSERT(0);
> + return NULL;
> + }
> +}
> +
> +static inline bool fuse_iomap_addrs_adjacent(const struct fuse_iomap_io *left,
> + const struct fuse_iomap_io *right)
> +{
> + switch (left->type) {
> + case FUSE_IOMAP_TYPE_MAPPED:
> + case FUSE_IOMAP_TYPE_UNWRITTEN:
> + return left->addr + left->length == right->addr;
> + default:
> + return left->addr == FUSE_IOMAP_NULL_ADDR &&
> + right->addr == FUSE_IOMAP_NULL_ADDR;
> + }
> +}
> +
> +static inline bool fuse_iomap_can_merge(const struct fuse_iomap_io *left,
> + const struct fuse_iomap_io *right)
> +{
> + return (left->dev == right->dev &&
> + left->offset + left->length == right->offset &&
> + left->type == right->type &&
> + fuse_iomap_addrs_adjacent(left, right) &&
> + left->flags == right->flags &&
> + left->length + right->length <= FUSE_IOMAP_MAX_LEN);
> +}
> +
> +static inline bool fuse_iomap_can_merge3(const struct fuse_iomap_io *left,
> + const struct fuse_iomap_io *new,
> + const struct fuse_iomap_io *right)
> +{
> + return left->length + new->length + right->length <= FUSE_IOMAP_MAX_LEN;
> +}
> +
> +#if IS_ENABLED(CONFIG_FUSE_IOMAP_DEBUG)
> +static void fuse_iext_check_mappings(struct fuse_iomap_cache *ic,
> + struct fuse_iext_root *ir)
> +{
> + struct fuse_iext_cursor icur;
> + struct fuse_iomap_io prev, got;
> + struct inode *inode = ic->ic_inode;
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + unsigned long long nr = 0;
> +
> + if (ir->ir_bytes < 0 || !static_branch_unlikely(&fuse_iomap_debug))
> + return;
> +
> + fuse_iext_first(ir, &icur);
> + if (!fuse_iext_get_extent(ir, &icur, &prev))
> + return;
> + nr++;
> +
> + fuse_iext_next(ir, &icur);
> + while (fuse_iext_get_extent(ir, &icur, &got)) {
> + if (got.length == 0 ||
> + got.offset < prev.offset + prev.length ||
> + fuse_iomap_can_merge(&prev, &got)) {
> + printk(KERN_ERR "FUSE IOMAP CORRUPTION ino=%llu nr=%llu",
> + fi->orig_ino, nr);
> + printk(KERN_ERR "prev: offset=%llu length=%llu type=%u flags=0x%x dev=%u addr=%llu\n",
> + prev.offset, prev.length, prev.type, prev.flags,
> + prev.dev, prev.addr);
> + printk(KERN_ERR "curr: offset=%llu length=%llu type=%u flags=0x%x dev=%u addr=%llu\n",
> + got.offset, got.length, got.type, got.flags,
> + got.dev, got.addr);
> + }
> +
> + prev = got;
> + nr++;
> + fuse_iext_next(ir, &icur);
> + }
> +}
> +#else
> +# define fuse_iext_check_mappings(...) ((void)0)
> +#endif
> +
> +static void
> +fuse_iext_del_mapping(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *icur,
> + struct fuse_iomap_io *got, /* current extent entry */
> + struct fuse_iomap_io *del) /* data to remove from extents */
> +{
> + struct fuse_iomap_io new; /* new record to be inserted */
> + /* first addr (fsblock aligned) past del */
> + fuse_iext_key_t del_endaddr;
> + /* first offset (fsblock aligned) past del */
> + fuse_iext_key_t del_endoff = del->offset + del->length;
> + /* first offset (fsblock aligned) past got */
> + fuse_iext_key_t got_endoff = got->offset + got->length;
> + uint32_t state = fuse_iomap_fork_to_state(ic, ir);
> +
> + ASSERT(del->length > 0);
> + ASSERT(got->offset <= del->offset);
> + ASSERT(got_endoff >= del_endoff);
> +
> + switch (del->type) {
> + case FUSE_IOMAP_TYPE_MAPPED:
> + case FUSE_IOMAP_TYPE_UNWRITTEN:
> + del_endaddr = del->addr + del->length;
> + break;
> + default:
> + del_endaddr = FUSE_IOMAP_NULL_ADDR;
> + break;
> + }
> +
> + if (got->offset == del->offset)
> + state |= FUSE_IEXT_LEFT_FILLING;
> + if (got_endoff == del_endoff)
> + state |= FUSE_IEXT_RIGHT_FILLING;
> +
> + switch (state & (FUSE_IEXT_LEFT_FILLING | FUSE_IEXT_RIGHT_FILLING)) {
> + case FUSE_IEXT_LEFT_FILLING | FUSE_IEXT_RIGHT_FILLING:
> + /*
> + * Matches the whole extent. Delete the entry.
> + */
> + fuse_iext_remove(ic, icur, state);
> + fuse_iext_prev(ir, icur);
> + break;
> + case FUSE_IEXT_LEFT_FILLING:
> + /*
> + * Deleting the first part of the extent.
> + */
> + got->offset = del_endoff;
> + got->addr = del_endaddr;
> + got->length -= del->length;
> + fuse_iext_update_extent(ic, state, icur, got);
> + break;
> + case FUSE_IEXT_RIGHT_FILLING:
> + /*
> + * Deleting the last part of the extent.
> + */
> + got->length -= del->length;
> + fuse_iext_update_extent(ic, state, icur, got);
> + break;
> + case 0:
> + /*
> + * Deleting the middle of the extent.
> + */
> + got->length = del->offset - got->offset;
> + fuse_iext_update_extent(ic, state, icur, got);
> +
> + new.offset = del_endoff;
> + new.length = got_endoff - del_endoff;
> + new.type = got->type;
> + new.flags = got->flags;
> + new.addr = del_endaddr;
> + new.dev = got->dev;
> +
> + fuse_iext_next(ir, icur);
> + fuse_iext_insert(ic, icur, &new, state);
> + break;
> + }
> +}
> +
> +int
> +fuse_iomap_cache_remove(
> + struct inode *inode,
> + enum fuse_iomap_iodir iodir,
> + loff_t start, /* first file offset deleted */
> + uint64_t len) /* length to unmap */
> +{
> + struct fuse_iext_cursor icur;
> + struct fuse_iomap_io got; /* current extent record */
> + struct fuse_iomap_io del; /* extent being deleted */
> + loff_t end;
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> + struct fuse_iext_root *ir = fuse_iext_root_ptr(ic, iodir);
> + bool wasreal;
> + bool done = false;
> + int ret = 0;
> +
> + assert_cache_locked(ic);
> +
> + /* Fork is not active or has zero mappings */
> + if (ir->ir_bytes < 0 || fuse_iext_count(ir) == 0)
> + return 0;
> +
> + /* Fast shortcut if the caller wants to erase everything */
> + if (start == 0 && len >= inode->i_sb->s_maxbytes) {
> + fuse_iext_destroy(ir);
> + return 0;
> + }
> +
> + if (!len)
> + goto out;
> +
> + /*
> + * If the caller wants us to remove everything to EOF, we set the end
> + * of the removal range to the maximum file offset. We don't support
> + * unsigned file offsets.
> + */
> + if (len == FUSE_IOMAP_INVAL_TO_EOF) {
> + const unsigned int blocksize = i_blocksize(&fi->inode);
> +
> + len = round_up(inode->i_sb->s_maxbytes, blocksize) - start;
> + }
> +
> + /*
> + * Now that we've settled len, look up the extent before the end of the
> + * range.
> + */
> + end = start + len;
> + if (!fuse_iext_lookup_extent_before(ic, ir, &end, &icur, &got))
> + goto out;
> + end--;
> +
> + while (end != -1 && end >= start) {
> + /*
> + * Is the found extent after a hole in which end lives?
> + * Just back up to the previous extent, if so.
> + */
> + if (got.offset > end &&
> + !fuse_iext_prev_extent(ir, &icur, &got)) {
> + done = true;
> + break;
> + }
> + /*
> + * Is the last block of this extent before the range
> + * we're supposed to delete? If so, we're done.
> + */
> + end = min_t(loff_t, end, got.offset + got.length - 1);
> + if (end < start)
> + break;
> + /*
> + * Then deal with the (possibly delayed) allocated space
> + * we found.
> + */
> + del = got;
> + switch (del.type) {
> + case FUSE_IOMAP_TYPE_DELALLOC:
> + case FUSE_IOMAP_TYPE_HOLE:
> + case FUSE_IOMAP_TYPE_INLINE:
> + case FUSE_IOMAP_TYPE_PURE_OVERWRITE:
> + wasreal = false;
> + break;
> + case FUSE_IOMAP_TYPE_MAPPED:
> + case FUSE_IOMAP_TYPE_UNWRITTEN:
> + wasreal = true;
> + break;
> + default:
> + ASSERT(0);
> + ret = -EFSCORRUPTED;
> + goto out;
> + }
> +
> + if (got.offset < start) {
> + del.offset = start;
> + del.length -= start - got.offset;
> + if (wasreal)
> + del.addr += start - got.offset;
> + }
> + if (del.offset + del.length > end + 1)
> + del.length = end + 1 - del.offset;
> +
> + fuse_iext_del_mapping(ic, ir, &icur, &got, &del);
> + end = del.offset - 1;
> +
> + /*
> + * If not done go on to the next (previous) record.
> + */
> + if (end != -1 && end >= start) {
> + if (!fuse_iext_get_extent(ir, &icur, &got) ||
> + (got.offset > end &&
> + !fuse_iext_prev_extent(ir, &icur, &got))) {
> + done = true;
> + break;
> + }
> + }
> + }
> +
> + /* Should have removed everything */
> + if (len == 0 || done || end == (loff_t)-1 || end < start)
> + ret = 0;
> + else
> + ret = -EFSCORRUPTED;
> +
> +out:
> + fuse_iext_check_mappings(ic, ir);
> + return ret;
> +}
> +
> +static void
> +fuse_iext_add_mapping(
> + struct fuse_iomap_cache *ic,
> + struct fuse_iext_root *ir,
> + struct fuse_iext_cursor *icur,
> + const struct fuse_iomap_io *new) /* new extent entry */
> +{
> + struct fuse_iomap_io left; /* left neighbor extent entry */
> + struct fuse_iomap_io right; /* right neighbor extent entry */
> + uint32_t state = fuse_iomap_fork_to_state(ic, ir);
> +
> + /*
> + * Check and set flags if this segment has a left neighbor.
> + */
> + if (fuse_iext_peek_prev_extent(ir, icur, &left))
> + state |= FUSE_IEXT_LEFT_VALID;
> +
> + /*
> + * Check and set flags if this segment has a current value.
> + * Not true if we're inserting into the "hole" at eof.
> + */
> + if (fuse_iext_get_extent(ir, icur, &right))
> + state |= FUSE_IEXT_RIGHT_VALID;
> +
> + /*
> + * We're inserting a real allocation between "left" and "right".
> + * Set the contiguity flags. Don't let extents get too large.
> + */
> + if ((state & FUSE_IEXT_LEFT_VALID) && fuse_iomap_can_merge(&left, new))
> + state |= FUSE_IEXT_LEFT_CONTIG;
> +
> + if ((state & FUSE_IEXT_RIGHT_VALID) &&
> + fuse_iomap_can_merge(new, &right) &&
> + (!(state & FUSE_IEXT_LEFT_CONTIG) ||
> + fuse_iomap_can_merge3(&left, new, &right)))
> + state |= FUSE_IEXT_RIGHT_CONTIG;
> +
> + /*
> + * Select which case we're in here, and implement it.
> + */
> + switch (state & (FUSE_IEXT_LEFT_CONTIG | FUSE_IEXT_RIGHT_CONTIG)) {
> + case FUSE_IEXT_LEFT_CONTIG | FUSE_IEXT_RIGHT_CONTIG:
> + /*
> + * New allocation is contiguous with real allocations on the
> + * left and on the right.
> + * Merge all three into a single extent record.
> + */
> + left.length += new->length + right.length;
> +
> + fuse_iext_remove(ic, icur, state);
> + fuse_iext_prev(ir, icur);
> + fuse_iext_update_extent(ic, state, icur, &left);
> + break;
> +
> + case FUSE_IEXT_LEFT_CONTIG:
> + /*
> + * New allocation is contiguous with a real allocation
> + * on the left.
> + * Merge the new allocation with the left neighbor.
> + */
> + left.length += new->length;
> +
> + fuse_iext_prev(ir, icur);
> + fuse_iext_update_extent(ic, state, icur, &left);
> + break;
> +
> + case FUSE_IEXT_RIGHT_CONTIG:
> + /*
> + * New allocation is contiguous with a real allocation
> + * on the right.
> + * Merge the new allocation with the right neighbor.
> + */
> + right.offset = new->offset;
> + right.addr = new->addr;
> + right.length += new->length;
> + fuse_iext_update_extent(ic, state, icur, &right);
> + break;
> +
> + case 0:
> + /*
> + * New allocation is not contiguous with another
> + * real allocation.
> + * Insert a new entry.
> + */
> + fuse_iext_insert(ic, icur, new, state);
> + break;
> + }
> +}
> +
> +static int
> +fuse_iomap_cache_add(
> + struct inode *inode,
> + enum fuse_iomap_iodir iodir,
> + const struct fuse_iomap_io *new)
> +{
> + struct fuse_iext_cursor icur;
> + struct fuse_iomap_io got;
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> + struct fuse_iext_root *ir = fuse_iext_root_ptr(ic, iodir);
> +
> + assert_cache_locked(ic);
> + ASSERT(new->length > 0);
> + ASSERT(new->offset < inode->i_sb->s_maxbytes);
> +
> + /* Mark this fork as being in use */
> + if (ir->ir_bytes < 0)
> + ir->ir_bytes = 0;
> +
> + if (fuse_iext_lookup_extent(ic, ir, new->offset, &icur, &got)) {
> + /* make sure we only add into a hole. */
> + ASSERT(got.offset > new->offset);
> + ASSERT(got.offset - new->offset >= new->length);
> +
> + if (got.offset <= new->offset ||
> + got.offset - new->offset < new->length)
> + return -EFSCORRUPTED;
> + }
> +
> + fuse_iext_add_mapping(ic, ir, &icur, new);
> + fuse_iext_check_mappings(ic, ir);
> + return 0;
> +}
> +
> +int fuse_iomap_cache_alloc(struct inode *inode)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *old = NULL;
> + struct fuse_iomap_cache *ic;
> +
> + ic = kzalloc_obj(struct fuse_iomap_cache);
> + if (!ic)
> + return -ENOMEM;
> +
> + /* Only the write mapping cache can return NOFORK */
> + ic->ic_write.ir_bytes = -1;
> + ic->ic_inode = inode;
> + init_rwsem(&ic->ic_lock);
> +
> + if (!try_cmpxchg(&fi->cache, &old, ic)) {
> + /* Someone created mapping cache before us? Free ours... */
> + kfree(ic);
> + }
> +
> + return 0;
> +}
> +
> +static void fuse_iomap_cache_purge(struct fuse_iomap_cache *ic)
> +{
> + fuse_iext_destroy(&ic->ic_read);
> + fuse_iext_destroy(&ic->ic_write);
> +}
> +
> +void fuse_iomap_cache_free(struct inode *inode)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> +
> + /*
> + * This is only called from eviction, so we cannot be racing to set or
> + * clear the pointer.
> + */
> + fi->cache = NULL;
> +
> + fuse_iomap_cache_purge(ic);
> + kfree(ic);
> +}
> +
> +int
> +fuse_iomap_cache_upsert(
> + struct inode *inode,
> + enum fuse_iomap_iodir iodir,
> + const struct fuse_iomap_io *map)
> +{
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> + int err;
> +
> + ASSERT(fuse_inode_caches_iomaps(inode));
> +
> + /*
> + * We interpret no write fork to mean that all writes are pure
> + * overwrites. Avoid wasting memory if we're trying to upsert a
> + * pure overwrite.
> + */
> + if (iodir == WRITE_MAPPING &&
> + map->type == FUSE_IOMAP_TYPE_PURE_OVERWRITE &&
> + ic->ic_write.ir_bytes < 0)
> + return 0;
> +
> + err = fuse_iomap_cache_remove(inode, iodir, map->offset, map->length);
> + if (err)
> + return err;
> +
> + return fuse_iomap_cache_add(inode, iodir, map);
> +}
> +
> +/*
> + * Trim the returned map to the required bounds
> + */
> +static void
> +fuse_iomap_trim(
> + struct fuse_inode *fi,
> + struct fuse_iomap_lookup *mval,
> + const struct fuse_iomap_io *got,
> + loff_t off,
> + loff_t len)
> +{
> + struct fuse_iomap_cache *ic = fi->cache;
> + const unsigned int blocksize = i_blocksize(&fi->inode);
> + const loff_t aligned_off = round_down(off, blocksize);
> + const loff_t aligned_end = round_up(off + len, blocksize);
> + const loff_t aligned_len = aligned_end - aligned_off;
This realignment logic is no longer necessary because now we require
that all cached mappings are aligned to the blocksize...
> +
> + ASSERT(aligned_off >= got->offset);
> +
> + switch (got->type) {
> + case FUSE_IOMAP_TYPE_MAPPED:
> + case FUSE_IOMAP_TYPE_UNWRITTEN:
> + mval->map.addr = got->addr + (aligned_off - got->offset);
> + break;
> + default:
> + mval->map.addr = FUSE_IOMAP_NULL_ADDR;
> + break;
> + }
> + mval->map.offset = aligned_off;
> + mval->map.length = min_t(loff_t, aligned_len,
> + got->length - (aligned_off - got->offset));
...and this logic here has a bug where we can accidentally extend
mappings if off+len is far beyond the end of @got. In the end we can
more or less copy got to mval->map, set addr appropriately for the iomap
type, and sample the validity cookie.
--D
> + mval->map.type = got->type;
> + mval->map.flags = got->flags;
> + mval->map.dev = got->dev;
> + mval->validity_cookie = fuse_iext_read_seq(ic);
> +}
> +
> +enum fuse_iomap_lookup_result
> +fuse_iomap_cache_lookup(
> + struct inode *inode,
> + enum fuse_iomap_iodir iodir,
> + loff_t off,
> + uint64_t len,
> + struct fuse_iomap_lookup *mval)
> +{
> + struct fuse_iomap_io got;
> + struct fuse_iext_cursor icur;
> + struct fuse_inode *fi = get_fuse_inode(inode);
> + struct fuse_iomap_cache *ic = fi->cache;
> + struct fuse_iext_root *ir = fuse_iext_root_ptr(ic, iodir);
> +
> + assert_cache_locked_shared(ic);
> +
> + if (ir->ir_bytes < 0) {
> + /*
> + * No write fork at all means this filesystem doesn't do out of
> + * place writes.
> + */
> + return LOOKUP_NOFORK;
> + }
> +
> + if (!fuse_iext_lookup_extent(ic, ir, off, &icur, &got)) {
> + /*
> + * Does not contain a mapping at or beyond off, which is a
> + * cache miss.
> + */
> + return LOOKUP_MISS;
> + }
> +
> + if (got.offset > off) {
> + /*
> + * Found a mapping, but it doesn't cover the start of the
> + * range, which is effectively a miss.
> + */
> + return LOOKUP_MISS;
> + }
> +
> + /* Found a mapping in the cache, return it */
> + fuse_iomap_trim(fi, mval, &got, off, len);
> + return LOOKUP_HIT;
> +}
> diff --git a/fs/fuse/trace.c b/fs/fuse/trace.c
> index 71d444ac1e5021..69310d6f773ffa 100644
> --- a/fs/fuse/trace.c
> +++ b/fs/fuse/trace.c
> @@ -8,6 +8,7 @@
> #include "fuse_dev_i.h"
> #include "fuse_iomap.h"
> #include "fuse_iomap_i.h"
> +#include "fuse_iomap_cache.h"
>
> #include <linux/pagemap.h>
> #include <linux/iomap.h>
>
>
next prev parent reply other threads:[~2026-02-27 18:07 UTC|newest]
Thread overview: 230+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-23 22:46 [PATCHBLIZZARD v7] fuse/libfuse/e2fsprogs: containerize ext4 for safer operation Darrick J. Wong
2026-02-23 23:00 ` [PATCHSET v7 1/9] fuse: general bug fixes Darrick J. Wong
2026-02-23 23:06 ` [PATCH 1/5] fuse: flush pending FUSE_RELEASE requests before sending FUSE_DESTROY Darrick J. Wong
2026-02-24 19:33 ` Joanne Koong
2026-02-24 19:57 ` Darrick J. Wong
2026-02-24 20:03 ` Joanne Koong
2026-02-23 23:06 ` [PATCH 2/5] fuse: quiet down complaints in fuse_conn_limit_write Darrick J. Wong
2026-02-24 8:36 ` Horst Birthelmer
2026-02-24 19:17 ` Darrick J. Wong
2026-02-24 20:09 ` Joanne Koong
2026-02-27 16:05 ` Miklos Szeredi
2026-02-23 23:07 ` [PATCH 3/5] fuse: implement file attributes mask for statx Darrick J. Wong
2026-03-25 18:35 ` Joanne Koong
2026-03-25 22:12 ` Darrick J. Wong
2026-02-23 23:07 ` [PATCH 4/5] fuse: update file mode when updating acls Darrick J. Wong
2026-03-25 19:39 ` Joanne Koong
2026-03-25 22:23 ` Darrick J. Wong
2026-02-23 23:07 ` [PATCH 5/5] fuse: propagate default and file acls on creation Darrick J. Wong
2026-02-23 23:00 ` [PATCHSET v7 2/9] iomap: cleanups ahead of adding fuse support Darrick J. Wong
2026-02-23 23:07 ` [PATCH 1/2] iomap: allow directio callers to supply _COMP_WORK Darrick J. Wong
2026-02-24 14:00 ` Christoph Hellwig
2026-02-24 19:17 ` Darrick J. Wong
2026-02-23 23:08 ` [PATCH 2/2] iomap: allow NULL swap info bdev when activating swapfile Darrick J. Wong
2026-02-24 14:01 ` Christoph Hellwig
2026-02-24 19:26 ` Darrick J. Wong
2026-02-25 14:16 ` Christoph Hellwig
2026-02-25 17:03 ` Darrick J. Wong
2026-02-25 17:49 ` Christoph Hellwig
2026-02-23 23:01 ` [PATCHSET v7 3/9] fuse: cleanups ahead of adding fuse support Darrick J. Wong
2026-02-23 23:08 ` [PATCH 1/2] fuse: move the passthrough-specific code back to passthrough.c Darrick J. Wong
2026-02-23 23:08 ` [PATCH 2/2] fuse_trace: " Darrick J. Wong
2026-02-23 23:01 ` [PATCHSET v7 4/9] fuse: allow servers to use iomap for better file IO performance Darrick J. Wong
2026-02-23 23:08 ` [PATCH 01/33] fuse: implement the basic iomap mechanisms Darrick J. Wong
2026-02-23 23:09 ` [PATCH 02/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:09 ` [PATCH 03/33] fuse: make debugging configurable at runtime Darrick J. Wong
2026-02-23 23:09 ` [PATCH 04/33] fuse: adapt FUSE_DEV_IOC_BACKING_{OPEN,CLOSE} to add new iomap devices Darrick J. Wong
2026-02-23 23:09 ` [PATCH 05/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:10 ` [PATCH 06/33] fuse: enable SYNCFS and ensure we flush everything before sending DESTROY Darrick J. Wong
2026-02-23 23:10 ` [PATCH 07/33] fuse: clean up per-file type inode initialization Darrick J. Wong
2026-02-23 23:10 ` [PATCH 08/33] fuse: create a per-inode flag for setting exclusive mode Darrick J. Wong
2026-02-23 23:11 ` [PATCH 09/33] fuse: create a per-inode flag for toggling iomap Darrick J. Wong
2026-02-23 23:11 ` [PATCH 10/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:11 ` [PATCH 11/33] fuse: isolate the other regular file IO paths from iomap Darrick J. Wong
2026-02-23 23:11 ` [PATCH 12/33] fuse: implement basic iomap reporting such as FIEMAP and SEEK_{DATA,HOLE} Darrick J. Wong
2026-02-23 23:12 ` [PATCH 13/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:12 ` [PATCH 14/33] fuse: implement direct IO with iomap Darrick J. Wong
2026-02-23 23:12 ` [PATCH 15/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:12 ` [PATCH 16/33] fuse: implement buffered " Darrick J. Wong
2026-02-27 18:04 ` Darrick J. Wong
2026-02-23 23:13 ` [PATCH 17/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:13 ` [PATCH 18/33] fuse: use an unrestricted backing device with iomap pagecache io Darrick J. Wong
2026-02-23 23:13 ` [PATCH 19/33] fuse: implement large folios for iomap pagecache files Darrick J. Wong
2026-02-23 23:13 ` [PATCH 20/33] fuse: advertise support for iomap Darrick J. Wong
2026-02-23 23:14 ` [PATCH 21/33] fuse: query filesystem geometry when using iomap Darrick J. Wong
2026-02-23 23:14 ` [PATCH 22/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:14 ` [PATCH 23/33] fuse: implement fadvise for iomap files Darrick J. Wong
2026-02-23 23:14 ` [PATCH 24/33] fuse: invalidate ranges of block devices being used for iomap Darrick J. Wong
2026-02-23 23:15 ` [PATCH 25/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:15 ` [PATCH 26/33] fuse: implement inline data file IO via iomap Darrick J. Wong
2026-02-27 18:02 ` Darrick J. Wong
2026-02-23 23:15 ` [PATCH 27/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:15 ` [PATCH 28/33] fuse: allow more statx fields Darrick J. Wong
2026-02-23 23:16 ` [PATCH 29/33] fuse: support atomic writes with iomap Darrick J. Wong
2026-02-24 12:58 ` Pankaj Raghav (Samsung)
2026-02-24 19:30 ` Darrick J. Wong
2026-02-24 21:18 ` Pankaj Raghav (Samsung)
2026-02-23 23:16 ` [PATCH 30/33] fuse_trace: " Darrick J. Wong
2026-02-23 23:16 ` [PATCH 31/33] fuse: disable direct fs reclaim for any fuse server that uses iomap Darrick J. Wong
2026-02-23 23:17 ` [PATCH 32/33] fuse: enable swapfile activation on iomap Darrick J. Wong
2026-02-23 23:17 ` [PATCH 33/33] fuse: implement freeze and shutdowns for iomap filesystems Darrick J. Wong
2026-02-23 23:01 ` [PATCHSET v7 5/9] fuse: allow servers to specify root node id Darrick J. Wong
2026-02-23 23:17 ` [PATCH 1/3] fuse: make the root nodeid dynamic Darrick J. Wong
2026-02-23 23:17 ` [PATCH 2/3] fuse_trace: " Darrick J. Wong
2026-02-23 23:18 ` [PATCH 3/3] fuse: allow setting of root nodeid Darrick J. Wong
2026-02-23 23:01 ` [PATCHSET v7 6/9] fuse: handle timestamps and ACLs correctly when iomap is enabled Darrick J. Wong
2026-02-23 23:18 ` [PATCH 1/9] fuse: enable caching of timestamps Darrick J. Wong
2026-02-23 23:18 ` [PATCH 2/9] fuse: force a ctime update after a fileattr_set call when in iomap mode Darrick J. Wong
2026-02-23 23:18 ` [PATCH 3/9] fuse: allow local filesystems to set some VFS iflags Darrick J. Wong
2026-02-23 23:19 ` [PATCH 4/9] fuse_trace: " Darrick J. Wong
2026-02-23 23:19 ` [PATCH 5/9] fuse: cache atime when in iomap mode Darrick J. Wong
2026-02-23 23:19 ` [PATCH 6/9] fuse: let the kernel handle KILL_SUID/KILL_SGID for iomap filesystems Darrick J. Wong
2026-02-23 23:19 ` [PATCH 7/9] fuse_trace: " Darrick J. Wong
2026-02-23 23:20 ` [PATCH 8/9] fuse: update ctime when updating acls on an iomap inode Darrick J. Wong
2026-02-23 23:20 ` [PATCH 9/9] fuse: always cache ACLs when using iomap Darrick J. Wong
2026-02-23 23:02 ` [PATCHSET v7 7/9] fuse: cache iomap mappings for even better file IO performance Darrick J. Wong
2026-02-23 23:20 ` [PATCH 01/12] fuse: cache iomaps Darrick J. Wong
2026-02-27 18:07 ` Darrick J. Wong [this message]
2026-02-23 23:20 ` [PATCH 02/12] fuse_trace: " Darrick J. Wong
2026-02-23 23:21 ` [PATCH 03/12] fuse: use the iomap cache for iomap_begin Darrick J. Wong
2026-02-23 23:21 ` [PATCH 04/12] fuse_trace: " Darrick J. Wong
2026-02-23 23:21 ` [PATCH 05/12] fuse: invalidate iomap cache after file updates Darrick J. Wong
2026-02-23 23:21 ` [PATCH 06/12] fuse_trace: " Darrick J. Wong
2026-02-23 23:22 ` [PATCH 07/12] fuse: enable iomap cache management Darrick J. Wong
2026-02-23 23:22 ` [PATCH 08/12] fuse_trace: " Darrick J. Wong
2026-02-23 23:22 ` [PATCH 09/12] fuse: overlay iomap inode info in struct fuse_inode Darrick J. Wong
2026-02-23 23:23 ` [PATCH 10/12] fuse: constrain iomap mapping cache size Darrick J. Wong
2026-02-23 23:23 ` [PATCH 11/12] fuse_trace: " Darrick J. Wong
2026-02-23 23:23 ` [PATCH 12/12] fuse: enable iomap Darrick J. Wong
2026-02-23 23:02 ` [PATCHSET v7 8/9] fuse: run fuse servers as a contained service Darrick J. Wong
2026-02-23 23:23 ` [PATCH 1/2] fuse: allow privileged mount helpers to pre-approve iomap usage Darrick J. Wong
2026-02-23 23:24 ` [PATCH 2/2] fuse: set iomap backing device block size Darrick J. Wong
2026-02-23 23:02 ` [PATCHSET RFC 9/9] fuse: allow fuse servers to upload iomap BPF programs Darrick J. Wong
2026-02-23 23:24 ` [PATCH 1/5] fuse: enable fuse servers to upload BPF programs to handle iomap requests Darrick J. Wong
2026-02-23 23:24 ` [PATCH 2/5] fuse_trace: " Darrick J. Wong
2026-02-23 23:24 ` [PATCH 3/5] fuse: prevent iomap bpf programs from writing to most of the system Darrick J. Wong
2026-02-23 23:25 ` [PATCH 4/5] fuse: add kfuncs for iomap bpf programs to manage the cache Darrick J. Wong
2026-02-23 23:25 ` [PATCH 5/5] fuse: make fuse_inode opaque to iomap bpf programs Darrick J. Wong
2026-02-23 23:02 ` [PATCHSET v7 1/6] libfuse: allow servers to use iomap for better file IO performance Darrick J. Wong
2026-02-23 23:25 ` [PATCH 01/25] libfuse: bump kernel and library ABI versions Darrick J. Wong
2026-02-23 23:25 ` [PATCH 02/25] libfuse: wait in do_destroy until all open files are closed Darrick J. Wong
2026-02-23 23:26 ` [PATCH 03/25] libfuse: add kernel gates for FUSE_IOMAP Darrick J. Wong
2026-02-23 23:26 ` [PATCH 04/25] libfuse: add fuse commands for iomap_begin and end Darrick J. Wong
2026-02-23 23:26 ` [PATCH 05/25] libfuse: add upper level iomap commands Darrick J. Wong
2026-02-23 23:26 ` [PATCH 06/25] libfuse: add a lowlevel notification to add a new device to iomap Darrick J. Wong
2026-02-23 23:27 ` [PATCH 07/25] libfuse: add upper-level iomap add device function Darrick J. Wong
2026-02-23 23:27 ` [PATCH 08/25] libfuse: add iomap ioend low level handler Darrick J. Wong
2026-02-23 23:27 ` [PATCH 09/25] libfuse: add upper level iomap ioend commands Darrick J. Wong
2026-02-23 23:27 ` [PATCH 10/25] libfuse: add a reply function to send FUSE_ATTR_* to the kernel Darrick J. Wong
2026-02-23 23:28 ` [PATCH 11/25] libfuse: connect high level fuse library to fuse_reply_attr_iflags Darrick J. Wong
2026-02-23 23:28 ` [PATCH 12/25] libfuse: support enabling exclusive mode for files Darrick J. Wong
2026-02-23 23:28 ` [PATCH 13/25] libfuse: support direct I/O through iomap Darrick J. Wong
2026-02-23 23:29 ` [PATCH 14/25] libfuse: don't allow hardlinking of iomap files in the upper level fuse library Darrick J. Wong
2026-02-23 23:29 ` [PATCH 15/25] libfuse: allow discovery of the kernel's iomap capabilities Darrick J. Wong
2026-02-23 23:29 ` [PATCH 16/25] libfuse: add lower level iomap_config implementation Darrick J. Wong
2026-02-23 23:29 ` [PATCH 17/25] libfuse: add upper " Darrick J. Wong
2026-02-23 23:30 ` [PATCH 18/25] libfuse: add low level code to invalidate iomap block device ranges Darrick J. Wong
2026-02-23 23:30 ` [PATCH 19/25] libfuse: add upper-level API to invalidate parts of an iomap block device Darrick J. Wong
2026-02-23 23:30 ` [PATCH 20/25] libfuse: add atomic write support Darrick J. Wong
2026-02-23 23:30 ` [PATCH 21/25] libfuse: allow disabling of fs memory reclaim and write throttling Darrick J. Wong
2026-02-23 23:31 ` [PATCH 22/25] libfuse: create a helper to transform an open regular file into an open loopdev Darrick J. Wong
2026-02-23 23:31 ` [PATCH 23/25] libfuse: add swapfile support for iomap files Darrick J. Wong
2026-02-23 23:31 ` [PATCH 24/25] libfuse: add lower-level filesystem freeze, thaw, and shutdown requests Darrick J. Wong
2026-02-23 23:31 ` [PATCH 25/25] libfuse: add upper-level filesystem freeze, thaw, and shutdown events Darrick J. Wong
2026-02-23 23:03 ` [PATCHSET v7 2/6] libfuse: allow servers to specify root node id Darrick J. Wong
2026-02-23 23:32 ` [PATCH 1/1] libfuse: allow root_nodeid mount option Darrick J. Wong
2026-02-23 23:03 ` [PATCHSET v7 3/6] libfuse: implement syncfs Darrick J. Wong
2026-02-23 23:32 ` [PATCH 1/2] libfuse: add strictatime/lazytime mount options Darrick J. Wong
2026-02-23 23:32 ` [PATCH 2/2] libfuse: set sync, immutable, and append when loading files Darrick J. Wong
2026-02-23 23:03 ` [PATCHSET v7 4/6] libfuse: cache iomap mappings for even better file IO performance Darrick J. Wong
2026-02-23 23:32 ` [PATCH 1/5] libfuse: enable iomap cache management for lowlevel fuse Darrick J. Wong
2026-02-23 23:33 ` [PATCH 2/5] libfuse: add upper-level iomap cache management Darrick J. Wong
2026-02-23 23:33 ` [PATCH 3/5] libfuse: allow constraining of iomap mapping cache size Darrick J. Wong
2026-02-23 23:33 ` [PATCH 4/5] libfuse: add upper-level iomap mapping cache constraint code Darrick J. Wong
2026-02-23 23:33 ` [PATCH 5/5] libfuse: enable iomap Darrick J. Wong
2026-02-23 23:03 ` [PATCHSET v7 5/6] libfuse: run fuse servers as a contained service Darrick J. Wong
2026-02-23 23:34 ` [PATCH 1/5] libfuse: add systemd/inetd socket service mounting helper Darrick J. Wong
2026-02-23 23:34 ` [PATCH 2/5] libfuse: integrate fuse services into mount.fuse3 Darrick J. Wong
2026-02-23 23:34 ` [PATCH 3/5] libfuse: delegate iomap privilege from mount.service to fuse services Darrick J. Wong
2026-02-23 23:34 ` [PATCH 4/5] libfuse: enable setting iomap block device block size Darrick J. Wong
2026-02-23 23:35 ` [PATCH 5/5] fuservicemount: create loop devices for regular files Darrick J. Wong
2026-02-23 23:04 ` [PATCHSET RFC 6/6] fuse: allow fuse servers to upload iomap BPF programs Darrick J. Wong
2026-02-23 23:35 ` [PATCH 1/3] libfuse: allow fuse servers to upload bpf code for iomap functions Darrick J. Wong
2026-02-23 23:35 ` [PATCH 2/3] libfuse: add kfuncs for iomap bpf programs to manage the cache Darrick J. Wong
2026-02-23 23:36 ` [PATCH 3/3] libfuse: make fuse_inode opaque to iomap bpf programs Darrick J. Wong
2026-02-23 23:04 ` [PATCHSET v7 1/8] fuse2fs: use fuse iomap data paths for better file I/O performance Darrick J. Wong
2026-02-23 23:36 ` [PATCH 01/19] fuse2fs: implement bare minimum iomap for file mapping reporting Darrick J. Wong
2026-02-23 23:36 ` [PATCH 02/19] fuse2fs: add iomap= mount option Darrick J. Wong
2026-02-23 23:36 ` [PATCH 03/19] fuse2fs: implement iomap configuration Darrick J. Wong
2026-02-23 23:37 ` [PATCH 04/19] fuse2fs: register block devices for use with iomap Darrick J. Wong
2026-02-23 23:37 ` [PATCH 05/19] fuse2fs: implement directio file reads Darrick J. Wong
2026-02-23 23:37 ` [PATCH 06/19] fuse2fs: add extent dump function for debugging Darrick J. Wong
2026-02-23 23:37 ` [PATCH 07/19] fuse2fs: implement direct write support Darrick J. Wong
2026-02-23 23:38 ` [PATCH 08/19] fuse2fs: turn on iomap for pagecache IO Darrick J. Wong
2026-02-23 23:38 ` [PATCH 09/19] fuse2fs: don't zero bytes in punch hole Darrick J. Wong
2026-02-23 23:38 ` [PATCH 10/19] fuse2fs: don't do file data block IO when iomap is enabled Darrick J. Wong
2026-02-23 23:38 ` [PATCH 11/19] fuse2fs: try to create loop device when ext4 device is a regular file Darrick J. Wong
2026-02-23 23:39 ` [PATCH 12/19] fuse2fs: enable file IO to inline data files Darrick J. Wong
2026-02-23 23:39 ` [PATCH 13/19] fuse2fs: set iomap-related inode flags Darrick J. Wong
2026-02-23 23:39 ` [PATCH 14/19] fuse2fs: configure block device block size Darrick J. Wong
2026-02-23 23:39 ` [PATCH 15/19] fuse4fs: separate invalidation Darrick J. Wong
2026-02-23 23:40 ` [PATCH 16/19] fuse2fs: implement statx Darrick J. Wong
2026-02-23 23:40 ` [PATCH 17/19] fuse2fs: enable atomic writes Darrick J. Wong
2026-02-23 23:40 ` [PATCH 18/19] fuse4fs: disable fs reclaim and write throttling Darrick J. Wong
2026-02-23 23:41 ` [PATCH 19/19] fuse2fs: implement freeze and shutdown requests Darrick J. Wong
2026-02-23 23:04 ` [PATCHSET v7 2/8] fuse4fs: specify the root node id Darrick J. Wong
2026-02-23 23:41 ` [PATCH 1/1] fuse4fs: don't use inode number translation when possible Darrick J. Wong
2026-02-23 23:05 ` [PATCHSET v7 3/8] fuse2fs: handle timestamps and ACLs correctly when iomap is enabled Darrick J. Wong
2026-02-23 23:41 ` [PATCH 01/10] fuse2fs: add strictatime/lazytime mount options Darrick J. Wong
2026-02-23 23:41 ` [PATCH 02/10] fuse2fs: skip permission checking on utimens when iomap is enabled Darrick J. Wong
2026-02-23 23:42 ` [PATCH 03/10] fuse2fs: let the kernel tell us about acl/mode updates Darrick J. Wong
2026-02-23 23:42 ` [PATCH 04/10] fuse2fs: better debugging for file mode updates Darrick J. Wong
2026-02-23 23:42 ` [PATCH 05/10] fuse2fs: debug timestamp updates Darrick J. Wong
2026-02-23 23:42 ` [PATCH 06/10] fuse2fs: use coarse timestamps for iomap mode Darrick J. Wong
2026-02-23 23:43 ` [PATCH 07/10] fuse2fs: add tracing for retrieving timestamps Darrick J. Wong
2026-02-23 23:43 ` [PATCH 08/10] fuse2fs: enable syncfs Darrick J. Wong
2026-02-23 23:43 ` [PATCH 09/10] fuse2fs: set sync, immutable, and append at file load time Darrick J. Wong
2026-02-23 23:43 ` [PATCH 10/10] fuse4fs: increase attribute timeout in iomap mode Darrick J. Wong
2026-02-23 23:05 ` [PATCHSET v7 4/8] fuse2fs: cache iomap mappings for even better file IO performance Darrick J. Wong
2026-02-23 23:44 ` [PATCH 1/3] fuse2fs: enable caching of iomaps Darrick J. Wong
2026-02-23 23:44 ` [PATCH 2/3] fuse2fs: constrain iomap mapping cache size Darrick J. Wong
2026-02-23 23:44 ` [PATCH 3/3] fuse2fs: enable iomap Darrick J. Wong
2026-02-23 23:05 ` [PATCHSET v7 5/8] fuse2fs: improve block and inode caching Darrick J. Wong
2026-02-23 23:44 ` [PATCH 1/6] libsupport: add caching IO manager Darrick J. Wong
2026-02-23 23:45 ` [PATCH 2/6] iocache: add the actual buffer cache Darrick J. Wong
2026-02-23 23:45 ` [PATCH 3/6] iocache: bump buffer mru priority every 50 accesses Darrick J. Wong
2026-02-23 23:45 ` [PATCH 4/6] fuse2fs: enable caching IO manager Darrick J. Wong
2026-02-23 23:45 ` [PATCH 5/6] fuse2fs: increase inode cache size Darrick J. Wong
2026-02-23 23:46 ` [PATCH 6/6] libext2fs: improve caching for inodes Darrick J. Wong
2026-02-23 23:05 ` [PATCHSET v7 6/8] fuse4fs: run servers as a contained service Darrick J. Wong
2026-02-23 23:46 ` [PATCH 1/8] libext2fs: fix MMP code to work with unixfd IO manager Darrick J. Wong
2026-02-23 23:46 ` [PATCH 2/8] fuse4fs: enable safe service mode Darrick J. Wong
2026-02-23 23:47 ` [PATCH 3/8] fuse4fs: set proc title when in fuse " Darrick J. Wong
2026-02-23 23:47 ` [PATCH 4/8] fuse4fs: upsert first file mapping to kernel on open Darrick J. Wong
2026-02-23 23:47 ` [PATCH 5/8] fuse4fs: set iomap backing device blocksize Darrick J. Wong
2026-02-23 23:47 ` [PATCH 6/8] fuse4fs: ask for loop devices when opening via fuservicemount Darrick J. Wong
2026-02-23 23:48 ` [PATCH 7/8] fuse4fs: make MMP work correctly in safe service mode Darrick J. Wong
2026-02-23 23:48 ` [PATCH 8/8] debian: update packaging for fuse4fs service Darrick J. Wong
2026-02-23 23:06 ` [PATCHSET v7 7/8] fuse4fs: reclaim buffer cache under memory pressure Darrick J. Wong
2026-02-23 23:48 ` [PATCH 1/4] libsupport: add pressure stall monitor Darrick J. Wong
2026-02-23 23:48 ` [PATCH 2/4] fuse2fs: only reclaim buffer cache when there is memory pressure Darrick J. Wong
2026-02-23 23:49 ` [PATCH 3/4] fuse4fs: enable memory pressure monitoring with service containers Darrick J. Wong
2026-02-23 23:49 ` [PATCH 4/4] fuse2fs: flush dirty metadata periodically Darrick J. Wong
2026-02-23 23:06 ` [PATCHSET RFC 8/8] fuse: allow fuse servers to upload iomap BPF programs Darrick J. Wong
2026-02-23 23:49 ` [PATCH 1/3] fuse4fs: add dynamic iomap bpf prototype which will break FIEMAP Darrick J. Wong
2026-02-23 23:49 ` [PATCH 2/3] fuse4fs: wire up caching examples to fuse iomap bpf program Darrick J. Wong
2026-02-23 23:50 ` [PATCH 3/3] fuse4fs: adjust test bpf program to deal with opaque inodes Darrick J. Wong
2026-03-16 17:56 ` [PATCHBLIZZARD v7] fuse/libfuse/e2fsprogs: containerize ext4 for safer operation Joanne Koong
2026-03-16 18:04 ` Darrick J. Wong
2026-03-16 23:08 ` Joanne Koong
2026-03-16 23:41 ` Darrick J. Wong
2026-03-17 0:20 ` Demi Marie Obenour
2026-03-17 13:59 ` Theodore Tso
2026-03-17 14:05 ` Demi Marie Obenour
2026-03-17 15:20 ` Theodore Tso
2026-03-18 21:31 ` Darrick J. Wong
2026-03-19 7:28 ` Demi Marie Obenour
2026-03-19 16:08 ` Darrick J. Wong
2026-03-20 17:04 ` Joanne Koong
2026-03-20 20:31 ` Darrick J. Wong
2026-03-17 0:10 ` Demi Marie Obenour
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260227180756.GK13829@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=bernd@bsbernd.com \
--cc=bpf@vger.kernel.org \
--cc=joannelkoong@gmail.com \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=miklos@szeredi.hu \
--cc=neal@gompa.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox