From: Jaegeuk Kim <jaegeuk@kernel.org>
To: Nanzhe Zhao <nzzhao@126.com>
Cc: linux-f2fs@lists.sourceforge.net, linux-fsdevel@vger.kernel.org,
Matthew Wilcox <willy@infradead.org>, Chao Yu <chao@kernel.org>,
Yi Zhang <yi.zhang@huawei.com>, Barry Song <21cnbao@gmail.com>
Subject: Re: [RFC PATCH 9/9] f2fs: Enable buffered read/write path large folios support for normal and atomic file with iomap
Date: Fri, 31 Oct 2025 21:28:40 +0000 [thread overview]
Message-ID: <aQUqCEfjAXubdRQk@google.com> (raw)
In-Reply-To: <20250813092131.44762-10-nzzhao@126.com>
Hi Nanzhe,
On 08/13, Nanzhe Zhao wrote:
> This commit enables large folios support for F2FS's buffered read and
> write paths.
>
> We introduce a helper function `f2fs_set_iomap` to handle all the logic
> that converts a f2fs_map_blocks to iomap.
>
> Currently, compressed files, encrypted files, and fsverity are not
> supported with iomap large folios.
If we cannot support the encrypted files, we'll lose the gain a lot. Any
idea on this? And, how about applying the folio->private stuffs and supporting
the buffered read path on non-compressed and encrypted/plain files without
iomap conversion?
>
> Since F2FS requires `f2fs_iomap_folio_state` (or a similar equivalent
> mechanism) to correctly support the iomap framework, when
> `CONFIG_F2FS_IOMAP_FOLIO_STATE` is not enabled, we will not use the
> iomap buffered read/write paths.
>
> Note: Since holes reported by f2fs_map_blocks come in two types
> (NULL_ADDR and unmapped dnodes).
> They requiring different handle logic to set iomap.length,
> So we add a new block state flag for f2fs_map_blocks
>
> Signed-off-by: Nanzhe Zhao <nzzhao@126.com>
> ---
> fs/f2fs/data.c | 286 +++++++++++++++++++++++++++++++++++++++++++----
> fs/f2fs/f2fs.h | 120 +++++++++++++-------
> fs/f2fs/file.c | 33 +++++-
> fs/f2fs/inline.c | 15 ++-
> fs/f2fs/inode.c | 27 +++++
> fs/f2fs/namei.c | 7 ++
> fs/f2fs/super.c | 3 +
> 7 files changed, 425 insertions(+), 66 deletions(-)
>
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 37eaf431ab42..243c6305b0c5 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1149,6 +1149,9 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
> {
> f2fs_set_data_blkaddr(dn, blkaddr);
> f2fs_update_read_extent_cache(dn);
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + f2fs_iomap_seq_inc(dn->inode);
> +#endif
> }
>
> /* dn->ofs_in_node will be returned with up-to-date last block pointer */
> @@ -1182,6 +1185,9 @@ int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
>
> if (folio_mark_dirty(dn->node_folio))
> dn->node_changed = true;
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + f2fs_iomap_seq_inc(dn->inode);
> +#endif
> return 0;
> }
>
> @@ -1486,6 +1492,7 @@ static int f2fs_map_no_dnode(struct inode *inode,
> *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
> if (map->m_next_extent)
> *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
> + map->m_flags |= F2FS_MAP_NODNODE;
> return 0;
> }
>
> @@ -1702,7 +1709,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
> if (blkaddr == NEW_ADDR)
> map->m_flags |= F2FS_MAP_DELALLOC;
> /* DIO READ and hole case, should not map the blocks. */
> - if (!(flag == F2FS_GET_BLOCK_DIO && is_hole && !map->m_may_create))
> + if (!(flag == F2FS_GET_BLOCK_DIO && is_hole &&
> + !map->m_may_create) &&
> + !(flag == F2FS_GET_BLOCK_IOMAP && is_hole))
> map->m_flags |= F2FS_MAP_MAPPED;
>
> map->m_pblk = blkaddr;
> @@ -1736,6 +1745,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
> goto sync_out;
>
> map->m_len += dn.ofs_in_node - ofs_in_node;
> + /* Since we successfully reserved blocks, we can update the pblk now.
> + * No need to perform inefficient look up in write_begin again
> + */
> + map->m_pblk = dn.data_blkaddr;
> if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
> err = -ENOSPC;
> goto sync_out;
> @@ -4255,9 +4268,6 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
> if (err)
> return err;
> -
> - iomap->offset = F2FS_BLK_TO_BYTES(map.m_lblk);
> -
> /*
> * When inline encryption is enabled, sometimes I/O to an encrypted file
> * has to be broken up to guarantee DUN contiguity. Handle this by
> @@ -4272,28 +4282,44 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
> return -EINVAL;
>
> - if (map.m_flags & F2FS_MAP_MAPPED) {
> - if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
> - return -EINVAL;
> -
> - iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
> - iomap->type = IOMAP_MAPPED;
> - iomap->flags |= IOMAP_F_MERGED;
> - iomap->bdev = map.m_bdev;
> - iomap->addr = F2FS_BLK_TO_BYTES(map.m_pblk);
> -
> - if (flags & IOMAP_WRITE && map.m_last_pblk)
> - iomap->private = (void *)map.m_last_pblk;
> + return f2fs_set_iomap(inode, &map, iomap, flags, offset, length, false);
> +}
> +int f2fs_set_iomap(struct inode *inode, struct f2fs_map_blocks *map,
> + struct iomap *iomap, unsigned int flags, loff_t offset,
> + loff_t length, bool dio)
> +{
> + iomap->offset = F2FS_BLK_TO_BYTES(map->m_lblk);
> + if (map->m_flags & F2FS_MAP_MAPPED) {
> + if (dio) {
> + if (WARN_ON_ONCE(map->m_pblk == NEW_ADDR))
> + return -EINVAL;
> + }
> + iomap->length = F2FS_BLK_TO_BYTES(map->m_len);
> + iomap->bdev = map->m_bdev;
> + if (map->m_pblk != NEW_ADDR) {
> + iomap->type = IOMAP_MAPPED;
> + iomap->flags |= IOMAP_F_MERGED;
> + iomap->addr = F2FS_BLK_TO_BYTES(map->m_pblk);
> + } else {
> + iomap->type = IOMAP_UNWRITTEN;
> + iomap->addr = IOMAP_NULL_ADDR;
> + }
> + if (flags & IOMAP_WRITE && map->m_last_pblk)
> + iomap->private = (void *)map->m_last_pblk;
> } else {
> - if (flags & IOMAP_WRITE)
> + if (dio && flags & IOMAP_WRITE)
> return -ENOTBLK;
>
> - if (map.m_pblk == NULL_ADDR) {
> - iomap->length = F2FS_BLK_TO_BYTES(next_pgofs) -
> - iomap->offset;
> + if (map->m_pblk == NULL_ADDR) {
> + if (map->m_flags & F2FS_MAP_NODNODE)
> + iomap->length =
> + F2FS_BLK_TO_BYTES(*map->m_next_pgofs) -
> + iomap->offset;
> + else
> + iomap->length = F2FS_BLK_TO_BYTES(map->m_len);
> iomap->type = IOMAP_HOLE;
> - } else if (map.m_pblk == NEW_ADDR) {
> - iomap->length = F2FS_BLK_TO_BYTES(map.m_len);
> + } else if (map->m_pblk == NEW_ADDR) {
> + iomap->length = F2FS_BLK_TO_BYTES(map->m_len);
> iomap->type = IOMAP_UNWRITTEN;
> } else {
> f2fs_bug_on(F2FS_I_SB(inode), 1);
> @@ -4301,7 +4327,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> iomap->addr = IOMAP_NULL_ADDR;
> }
>
> - if (map.m_flags & F2FS_MAP_NEW)
> + if (map->m_flags & F2FS_MAP_NEW)
> iomap->flags |= IOMAP_F_NEW;
> if ((inode->i_state & I_DIRTY_DATASYNC) ||
> offset + length > i_size_read(inode))
> @@ -4313,3 +4339,217 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
> const struct iomap_ops f2fs_iomap_ops = {
> .iomap_begin = f2fs_iomap_begin,
> };
> +
> +/* iomap buffered-io */
> +static int f2fs_buffered_read_iomap_begin(struct inode *inode, loff_t offset,
> + loff_t length, unsigned int flags,
> + struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + pgoff_t next_pgofs = 0;
> + int err;
> + struct f2fs_map_blocks map = {};
> +
> + map.m_lblk = F2FS_BYTES_TO_BLK(offset);
> + map.m_len = F2FS_BYTES_TO_BLK(offset + length - 1) - map.m_lblk + 1;
> + map.m_next_pgofs = &next_pgofs;
> + map.m_seg_type =
> + f2fs_rw_hint_to_seg_type(F2FS_I_SB(inode), inode->i_write_hint);
> + map.m_may_create = false;
> + if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_IS_SHUTDOWN))
> + return -EIO;
> + /*
> + * If the blocks being overwritten are already allocated,
> + * f2fs_map_lock and f2fs_balance_fs are not necessary.
> + */
> + if (flags & IOMAP_WRITE)
> + return -EINVAL;
> +
> + err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_IOMAP);
> + if (err)
> + return err;
> +
> + if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
> + return -EINVAL;
> +
> + return f2fs_set_iomap(inode, &map, iomap, flags, offset, length, false);
> +}
> +
> +const struct iomap_ops f2fs_buffered_read_iomap_ops = {
> + .iomap_begin = f2fs_buffered_read_iomap_begin,
> +};
> +
> +static void f2fs_iomap_readahead(struct readahead_control *rac)
> +{
> + struct inode *inode = rac->mapping->host;
> +
> + if (!f2fs_is_compress_backend_ready(inode))
> + return;
> +
> + /* If the file has inline data, skip readahead */
> + if (f2fs_has_inline_data(inode))
> + return;
> + iomap_readahead(rac, &f2fs_buffered_read_iomap_ops);
> +}
> +
> +static int f2fs_buffered_write_iomap_begin(struct inode *inode, loff_t offset,
> + loff_t length, unsigned flags,
> + struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_map_blocks map = {};
> + struct folio *ifolio = NULL;
> + int err = 0;
> +
> + iomap->offset = offset;
> + iomap->bdev = sbi->sb->s_bdev;
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + iomap->validity_cookie = f2fs_iomap_seq_read(inode);
> +#endif
> + if (f2fs_has_inline_data(inode)) {
> + if (offset + length <= MAX_INLINE_DATA(inode)) {
> + ifolio = f2fs_get_inode_folio(sbi, inode->i_ino);
> + if (IS_ERR(ifolio)) {
> + err = PTR_ERR(ifolio);
> + goto failed;
> + }
> + set_inode_flag(inode, FI_DATA_EXIST);
> + f2fs_iomap_prepare_read_inline(inode, ifolio, iomap,
> + offset, length);
> + if (inode->i_nlink)
> + folio_set_f2fs_inline(ifolio);
> +
> + f2fs_folio_put(ifolio, 1);
> + goto out;
> + }
> + }
> + block_t start_blk = F2FS_BYTES_TO_BLK(offset);
> + block_t len_blks =
> + F2FS_BYTES_TO_BLK(offset + length - 1) - start_blk + 1;
> + err = f2fs_map_blocks_iomap(inode, start_blk, len_blks, &map);
> + if (map.m_pblk == NULL_ADDR) {
> + err = f2fs_map_blocks_preallocate(inode, map.m_lblk, len_blks,
> + &map);
> + if (err)
> + goto failed;
> + }
> + if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
> + return -EIO; // Should not happen for buffered write prep
> + err = f2fs_set_iomap(inode, &map, iomap, flags, offset, length, false);
> + if (err)
> + return err;
> +failed:
> + f2fs_write_failed(inode, offset + length);
> +out:
> + return err;
> +}
> +
> +static int f2fs_buffered_write_atomic_iomap_begin(struct inode *inode,
> + loff_t offset, loff_t length,
> + unsigned flags,
> + struct iomap *iomap,
> + struct iomap *srcmap)
> +{
> + struct inode *cow_inode = F2FS_I(inode)->cow_inode;
> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> + struct f2fs_map_blocks map = {};
> + int err = 0;
> +
> + iomap->offset = offset;
> + iomap->bdev = sbi->sb->s_bdev;
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + iomap->validity_cookie = f2fs_iomap_seq_read(inode);
> +#endif
> + block_t start_blk = F2FS_BYTES_TO_BLK(offset);
> + block_t len_blks =
> + F2FS_BYTES_TO_BLK(offset + length - 1) - start_blk + 1;
> + err = f2fs_map_blocks_iomap(cow_inode, start_blk, len_blks, &map);
> + if (err)
> + return err;
> + if (map.m_pblk == NULL_ADDR &&
> + is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) {
> + err = f2fs_map_blocks_preallocate(cow_inode, map.m_lblk,
> + map.m_len, &map);
> + if (err)
> + return err;
> + inc_atomic_write_cnt(inode);
> + goto out;
> + } else if (map.m_pblk != NULL_ADDR) {
> + goto out;
> + }
> + err = f2fs_map_blocks_iomap(inode, start_blk, len_blks, &map);
> + if (err)
> + return err;
> +out:
> + if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
> + return -EIO;
> +
> + return f2fs_set_iomap(inode, &map, iomap, flags, offset, length, false);
> +}
> +
> +static int f2fs_buffered_write_iomap_end(struct inode *inode, loff_t pos,
> + loff_t length, ssize_t written,
> + unsigned flags, struct iomap *iomap)
> +{
> + return written;
> +}
> +
> +const struct iomap_ops f2fs_buffered_write_iomap_ops = {
> + .iomap_begin = f2fs_buffered_write_iomap_begin,
> + .iomap_end = f2fs_buffered_write_iomap_end,
> +};
> +
> +const struct iomap_ops f2fs_buffered_write_atomic_iomap_ops = {
> + .iomap_begin = f2fs_buffered_write_atomic_iomap_begin,
> +};
> +
> +const struct address_space_operations f2fs_iomap_aops = {
> + .read_folio = f2fs_read_data_folio,
> + .readahead = f2fs_iomap_readahead,
> + .write_begin = f2fs_write_begin,
> + .write_end = f2fs_write_end,
> + .writepages = f2fs_write_data_pages,
> + .dirty_folio = f2fs_dirty_data_folio,
> + .invalidate_folio = f2fs_invalidate_folio,
> + .release_folio = f2fs_release_folio,
> + .migrate_folio = filemap_migrate_folio,
> + .is_partially_uptodate = iomap_is_partially_uptodate,
> + .error_remove_folio = generic_error_remove_folio,
> +};
> +
> +static void f2fs_iomap_put_folio(struct inode *inode, loff_t pos,
> + unsigned copied, struct folio *folio)
> +{
> + if (!copied)
> + goto unlock_out;
> + if (f2fs_is_atomic_file(inode))
> + folio_set_f2fs_atomic(folio);
> +
> + if (pos + copied > i_size_read(inode) &&
> + !f2fs_verity_in_progress(inode)) {
> + if (f2fs_is_atomic_file(inode))
> + f2fs_i_size_write(F2FS_I(inode)->cow_inode,
> + pos + copied);
> + }
> +unlock_out:
> + folio_unlock(folio);
> + folio_put(folio);
> + f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
> +}
> +
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> +static bool f2fs_iomap_valid(struct inode *inode, const struct iomap *iomap)
> +{
> + return iomap->validity_cookie == f2fs_iomap_seq_read(inode);
> +}
> +#else
> +static bool f2fs_iomap_valid(struct inode *inode, const struct iomap *iomap)
> +{
> + return 1;
> +}
> +#endif
> +const struct iomap_write_ops f2fs_iomap_write_ops = {
> + .put_folio = f2fs_iomap_put_folio,
> + .iomap_valid = f2fs_iomap_valid
> +};
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index ac9a6ac13e1f..1cf12b76b09a 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -762,6 +762,7 @@ struct extent_tree_info {
> #define F2FS_MAP_NEW (1U << 0)
> #define F2FS_MAP_MAPPED (1U << 1)
> #define F2FS_MAP_DELALLOC (1U << 2)
> +#define F2FS_MAP_NODNODE (1U << 3)
> #define F2FS_MAP_FLAGS (F2FS_MAP_NEW | F2FS_MAP_MAPPED |\
> F2FS_MAP_DELALLOC)
>
> @@ -837,49 +838,53 @@ enum {
>
> /* used for f2fs_inode_info->flags */
> enum {
> - FI_NEW_INODE, /* indicate newly allocated inode */
> - FI_DIRTY_INODE, /* indicate inode is dirty or not */
> - FI_AUTO_RECOVER, /* indicate inode is recoverable */
> - FI_DIRTY_DIR, /* indicate directory has dirty pages */
> - FI_INC_LINK, /* need to increment i_nlink */
> - FI_ACL_MODE, /* indicate acl mode */
> - FI_NO_ALLOC, /* should not allocate any blocks */
> - FI_FREE_NID, /* free allocated nide */
> - FI_NO_EXTENT, /* not to use the extent cache */
> - FI_INLINE_XATTR, /* used for inline xattr */
> - FI_INLINE_DATA, /* used for inline data*/
> - FI_INLINE_DENTRY, /* used for inline dentry */
> - FI_APPEND_WRITE, /* inode has appended data */
> - FI_UPDATE_WRITE, /* inode has in-place-update data */
> - FI_NEED_IPU, /* used for ipu per file */
> - FI_ATOMIC_FILE, /* indicate atomic file */
> - FI_DATA_EXIST, /* indicate data exists */
> - FI_SKIP_WRITES, /* should skip data page writeback */
> - FI_OPU_WRITE, /* used for opu per file */
> - FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> - FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
> - FI_HOT_DATA, /* indicate file is hot */
> - FI_EXTRA_ATTR, /* indicate file has extra attribute */
> - FI_PROJ_INHERIT, /* indicate file inherits projectid */
> - FI_PIN_FILE, /* indicate file should not be gced */
> - FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
> - FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
> - FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */
> - FI_MMAP_FILE, /* indicate file was mmapped */
> - FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */
> - FI_COMPRESS_RELEASED, /* compressed blocks were released */
> - FI_ALIGNED_WRITE, /* enable aligned write */
> - FI_COW_FILE, /* indicate COW file */
> - FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */
> - FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */
> - FI_ATOMIC_REPLACE, /* indicate atomic replace */
> - FI_OPENED_FILE, /* indicate file has been opened */
> - FI_DONATE_FINISHED, /* indicate page donation of file has been finished */
> - FI_MAX, /* max flag, never be used */
> + FI_NEW_INODE, /* indicate newly allocated inode */
> + FI_DIRTY_INODE, /* indicate inode is dirty or not */
> + FI_AUTO_RECOVER, /* indicate inode is recoverable */
> + FI_DIRTY_DIR, /* indicate directory has dirty pages */
> + FI_INC_LINK, /* need to increment i_nlink */
> + FI_ACL_MODE, /* indicate acl mode */
> + FI_NO_ALLOC, /* should not allocate any blocks */
> + FI_FREE_NID, /* free allocated nide */
> + FI_NO_EXTENT, /* not to use the extent cache */
> + FI_INLINE_XATTR, /* used for inline xattr */
> + FI_INLINE_DATA, /* used for inline data*/
> + FI_INLINE_DENTRY, /* used for inline dentry */
> + FI_APPEND_WRITE, /* inode has appended data */
> + FI_UPDATE_WRITE, /* inode has in-place-update data */
> + FI_NEED_IPU, /* used for ipu per file */
> + FI_ATOMIC_FILE, /* indicate atomic file */
> + FI_DATA_EXIST, /* indicate data exists */
> + FI_SKIP_WRITES, /* should skip data page writeback */
> + FI_OPU_WRITE, /* used for opu per file */
> + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */
> + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */
> + FI_HOT_DATA, /* indicate file is hot */
> + FI_EXTRA_ATTR, /* indicate file has extra attribute */
> + FI_PROJ_INHERIT, /* indicate file inherits projectid */
> + FI_PIN_FILE, /* indicate file should not be gced */
> + FI_VERITY_IN_PROGRESS, /* building fs-verity Merkle tree */
> + FI_COMPRESSED_FILE, /* indicate file's data can be compressed */
> + FI_COMPRESS_CORRUPT, /* indicate compressed cluster is corrupted */
> + FI_MMAP_FILE, /* indicate file was mmapped */
> + FI_ENABLE_COMPRESS, /* enable compression in "user" compression mode */
> + FI_COMPRESS_RELEASED, /* compressed blocks were released */
> + FI_ALIGNED_WRITE, /* enable aligned write */
> + FI_COW_FILE, /* indicate COW file */
> + FI_ATOMIC_COMMITTED, /* indicate atomic commit completed except disk sync */
> + FI_ATOMIC_DIRTIED, /* indicate atomic file is dirtied */
> + FI_ATOMIC_REPLACE, /* indicate atomic replace */
> + FI_OPENED_FILE, /* indicate file has been opened */
> + FI_DONATE_FINISHED, /* indicate page donation of file has been finished */
> + FI_IOMAP, /* indicate whether this inode should enable iomap*/
> + FI_MAX, /* max flag, never be used */
> };
>
> struct f2fs_inode_info {
> struct inode vfs_inode; /* serve a vfs inode */
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + atomic64_t i_iomap_seq; /* for iomap_valid sequence number */
> +#endif
> unsigned long i_flags; /* keep an inode flags for ioctl */
> unsigned char i_advise; /* use to give file attribute hints */
> unsigned char i_dir_level; /* use for dentry level for large dir */
> @@ -2814,6 +2819,16 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
> set_sbi_flag(sbi, SBI_IS_DIRTY);
> }
>
> +static inline void inc_page_count_multiple(struct f2fs_sb_info *sbi,
> + int count_type, int npages)
> +{
> + atomic_add(npages, &sbi->nr_pages[count_type]);
> +
> + if (count_type == F2FS_DIRTY_DENTS || count_type == F2FS_DIRTY_NODES ||
> + count_type == F2FS_DIRTY_META || count_type == F2FS_DIRTY_QDATA ||
> + count_type == F2FS_DIRTY_IMETA)
> + set_sbi_flag(sbi, SBI_IS_DIRTY);
> +}
> static inline void inode_inc_dirty_pages(struct inode *inode)
> {
> atomic_inc(&F2FS_I(inode)->dirty_pages);
> @@ -3657,6 +3672,10 @@ static inline bool f2fs_is_cow_file(struct inode *inode)
> return is_inode_flag_set(inode, FI_COW_FILE);
> }
>
> +static inline bool f2fs_iomap_inode(struct inode *inode)
> +{
> + return is_inode_flag_set(inode, FI_IOMAP);
> +}
> static inline void *inline_data_addr(struct inode *inode, struct folio *folio)
> {
> __le32 *addr = get_dnode_addr(inode, folio);
> @@ -3880,7 +3899,17 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc);
> void f2fs_remove_donate_inode(struct inode *inode);
> void f2fs_evict_inode(struct inode *inode);
> void f2fs_handle_failed_inode(struct inode *inode);
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> +static inline void f2fs_iomap_seq_inc(struct inode *inode)
> +{
> + atomic64_inc(&F2FS_I(inode)->i_iomap_seq);
> +}
>
> +static inline u64 f2fs_iomap_seq_read(struct inode *inode)
> +{
> + return atomic64_read(&F2FS_I(inode)->i_iomap_seq);
> +}
> +#endif
> /*
> * namei.c
> */
> @@ -4248,6 +4277,9 @@ int f2fs_write_single_data_page(struct folio *folio, int *submitted,
> enum iostat_type io_type,
> int compr_blocks, bool allow_balance);
> void f2fs_write_failed(struct inode *inode, loff_t to);
> +int f2fs_set_iomap(struct inode *inode, struct f2fs_map_blocks *map,
> + struct iomap *iomap, unsigned int flags, loff_t offset,
> + loff_t length, bool dio);
> void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
> bool f2fs_release_folio(struct folio *folio, gfp_t wait);
> bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len);
> @@ -4258,6 +4290,11 @@ int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi);
> void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi);
> extern const struct iomap_ops f2fs_iomap_ops;
>
> +extern const struct iomap_write_ops f2fs_iomap_write_ops;
> +extern const struct iomap_ops f2fs_buffered_read_iomap_ops;
> +extern const struct iomap_ops f2fs_buffered_write_iomap_ops;
> +extern const struct iomap_ops f2fs_buffered_write_atomic_iomap_ops;
> +
> /*
> * gc.c
> */
> @@ -4540,6 +4577,7 @@ extern const struct file_operations f2fs_dir_operations;
> extern const struct file_operations f2fs_file_operations;
> extern const struct inode_operations f2fs_file_inode_operations;
> extern const struct address_space_operations f2fs_dblock_aops;
> +extern const struct address_space_operations f2fs_iomap_aops;
> extern const struct address_space_operations f2fs_node_aops;
> extern const struct address_space_operations f2fs_meta_aops;
> extern const struct inode_operations f2fs_dir_inode_operations;
> @@ -4578,7 +4616,9 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
> int f2fs_inline_data_fiemap(struct inode *inode,
> struct fiemap_extent_info *fieinfo,
> __u64 start, __u64 len);
> -
> +void f2fs_iomap_prepare_read_inline(struct inode *inode, struct folio *ifolio,
> + struct iomap *iomap, loff_t pos,
> + loff_t length);
> /*
> * shrinker.c
> */
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 42faaed6a02d..6c5b3e632f2b 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -4965,7 +4965,14 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> if (ret)
> return ret;
> }
> -
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + /* Buffered write can convert inline file to large normal file
> + * when convert success, we uses mapping set large folios here
> + */
> + if (f2fs_should_use_buffered_iomap(inode))
> + mapping_set_large_folios(inode->i_mapping);
> + set_inode_flag(inode, FI_IOMAP);
> +#endif
> /* Do not preallocate blocks that will be written partially in 4KB. */
> map.m_lblk = F2FS_BLK_ALIGN(pos);
> map.m_len = F2FS_BYTES_TO_BLK(pos + count);
> @@ -4994,6 +5001,24 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> return map.m_len;
> }
>
> +static ssize_t f2fs_iomap_buffered_write(struct kiocb *iocb, struct iov_iter *i)
> +{
> + struct file *file = iocb->ki_filp;
> + struct inode *inode = file_inode(file);
> + ssize_t ret;
> +
> + if (f2fs_is_atomic_file(inode)) {
> + ret = iomap_file_buffered_write(iocb, i,
> + &f2fs_buffered_write_atomic_iomap_ops,
> + &f2fs_iomap_write_ops, NULL);
> + } else {
> + ret = iomap_file_buffered_write(iocb, i,
> + &f2fs_buffered_write_iomap_ops,
> + &f2fs_iomap_write_ops, NULL);
> + }
> + return ret;
> +}
> +
> static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
> struct iov_iter *from)
> {
> @@ -5004,7 +5029,11 @@ static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb,
> if (iocb->ki_flags & IOCB_NOWAIT)
> return -EOPNOTSUPP;
>
> - ret = generic_perform_write(iocb, from);
> + if (f2fs_iomap_inode(inode)) {
> + ret = f2fs_iomap_buffered_write(iocb, from);
> + } else {
> + ret = generic_perform_write(iocb, from);
> + }
>
> if (ret > 0) {
> f2fs_update_iostat(F2FS_I_SB(inode), inode,
> diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
> index 58ac831ef704..bda338b4fc22 100644
> --- a/fs/f2fs/inline.c
> +++ b/fs/f2fs/inline.c
> @@ -13,7 +13,7 @@
> #include "f2fs.h"
> #include "node.h"
> #include <trace/events/f2fs.h>
> -
> +#include <linux/iomap.h>
> static bool support_inline_data(struct inode *inode)
> {
> if (f2fs_used_in_atomic_write(inode))
> @@ -832,3 +832,16 @@ int f2fs_inline_data_fiemap(struct inode *inode,
> f2fs_folio_put(ifolio, true);
> return err;
> }
> +/* fill iomap struct for inline data case for
> + *iomap buffered write
> + */
> +void f2fs_iomap_prepare_read_inline(struct inode *inode, struct folio *ifolio,
> + struct iomap *iomap, loff_t pos,
> + loff_t length)
> +{
> + iomap->addr = IOMAP_NULL_ADDR;
> + iomap->length = length;
> + iomap->type = IOMAP_INLINE;
> + iomap->flags = 0;
> + iomap->inline_data = inline_data_addr(inode, ifolio);
> +}
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 8c4eafe9ffac..29378270d561 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -23,6 +23,24 @@
> extern const struct address_space_operations f2fs_compress_aops;
> #endif
>
> +bool f2fs_should_use_buffered_iomap(struct inode *inode)
> +{
> + if (!S_ISREG(inode->i_mode))
> + return false;
> + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
> + return false;
> + if (inode->i_mapping == NODE_MAPPING(F2FS_I_SB(inode)))
> + return false;
> + if (inode->i_mapping == META_MAPPING(F2FS_I_SB(inode)))
> + return false;
> + if (f2fs_encrypted_file(inode))
> + return false;
> + if (fsverity_active(inode))
> + return false;
> + if (f2fs_compressed_file(inode))
> + return false;
> + return true;
> +}
> void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync)
> {
> if (is_inode_flag_set(inode, FI_NEW_INODE))
> @@ -611,7 +629,16 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
> } else if (S_ISREG(inode->i_mode)) {
> inode->i_op = &f2fs_file_inode_operations;
> inode->i_fop = &f2fs_file_operations;
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + if (f2fs_should_use_buffered_iomap(inode)) {
> + mapping_set_large_folios(inode->i_mapping);
> + set_inode_flag(inode, FI_IOMAP);
> + inode->i_mapping->a_ops = &f2fs_iomap_aops;
> + } else
> + inode->i_mapping->a_ops = &f2fs_dblock_aops;
> +#else
> inode->i_mapping->a_ops = &f2fs_dblock_aops;
> +#endif
> } else if (S_ISDIR(inode->i_mode)) {
> inode->i_op = &f2fs_dir_inode_operations;
> inode->i_fop = &f2fs_dir_operations;
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index b882771e4699..2d995860c488 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -328,6 +328,13 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
> f2fs_init_extent_tree(inode);
>
> trace_f2fs_new_inode(inode, 0);
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + if (f2fs_should_use_buffered_iomap(inode)) {
> + set_inode_flag(inode, FI_IOMAP);
> + mapping_set_large_folios(inode->i_mapping);
> + }
> +#endif
> +
> return inode;
>
> fail:
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 2000880b7dca..35a42d6214fe 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1719,6 +1719,9 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
> init_once((void *) fi);
>
> /* Initialize f2fs-specific inode info */
> +#ifdef CONFIG_F2FS_IOMAP_FOLIO_STATE
> + atomic64_set(&fi->i_iomap_seq, 0);
> +#endif
> atomic_set(&fi->dirty_pages, 0);
> atomic_set(&fi->i_compr_blocks, 0);
> atomic_set(&fi->open_count, 0);
> --
> 2.34.1
>
next prev parent reply other threads:[~2025-10-31 21:28 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-13 9:21 [f2fs-dev] [RFC PATCH 0/9] f2fs: Enable buffered read/write large folios support with extended iomap Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 1/9] f2fs: Introduce f2fs_iomap_folio_state Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 2/9] f2fs: Integrate f2fs_iomap_folio_state into f2fs page private helpers Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 3/9] f2fs: Using `folio_detach_f2fs_private` in invalidate and release folio Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 4/9] f2fs: Convert outplace write path page private funcions to folio private functions Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 5/9] f2fs:Refactor `f2fs_is_compressed_page` to `f2fs_is_compressed_folio` Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 6/9] f2fs: Extend f2fs_io_info to support sub-folio ranges Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 7/9] f2fs:Make GC aware of large folios Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 8/9] f2fs: Introduce F2FS_GET_BLOCK_IOMAP and map_blocks he lpers Nanzhe Zhao
2025-08-13 9:21 ` [RFC PATCH 9/9] f2fs: Enable buffered read/write path large folios support for normal and atomic file with iomap Nanzhe Zhao
2025-10-31 21:28 ` Jaegeuk Kim [this message]
2025-11-04 6:31 ` Nanzhe Zhao
2025-11-07 17:39 ` Jaegeuk Kim
2025-08-13 15:22 ` [f2fs-dev] [RFC PATCH 0/9] f2fs: Enable buffered read/write large folios support with extended iomap Christoph Hellwig
2025-08-14 0:39 ` 赵南哲
2025-08-17 4:43 ` Nanzhe Zhao
-- strict thread matches above, loose matches on Subject: below --
2025-08-13 9:37 [f2fs-dev] [RESEND RFC " Nanzhe Zhao
2025-08-13 9:37 ` [RFC PATCH 9/9] f2fs: Enable buffered read/write path large folios support for normal and atomic file with iomap Nanzhe Zhao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=aQUqCEfjAXubdRQk@google.com \
--to=jaegeuk@kernel.org \
--cc=21cnbao@gmail.com \
--cc=chao@kernel.org \
--cc=linux-f2fs@lists.sourceforge.net \
--cc=linux-fsdevel@vger.kernel.org \
--cc=nzzhao@126.com \
--cc=willy@infradead.org \
--cc=yi.zhang@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).