From: Jan Kara <jack@suse.cz>
To: <linux-fsdevel@vger.kernel.org>
Cc: Christian Brauner <brauner@kernel.org>,
aivazian.tigran@gmail.com, Ted Tso <tytso@mit.edu>,
<linux-ext4@vger.kernel.org>,
OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>,
Jan Kara <jack@suse.cz>
Subject: [PATCH v2 02/10] ext4: Allocate mapping_metadata_bhs struct on demand
Date: Mon, 25 May 2026 10:58:08 +0200 [thread overview]
Message-ID: <20260525085821.769119-12-jack@suse.cz> (raw)
In-Reply-To: <20260525085035.12891-1-jack@suse.cz>
Currently every ext4 inode gets mapping_metadata_bhs struct although it
is only needed when running without a journal and only for inodes where
any metadata was dirtied. Allocate mapping_metadata_bhs struct on demand
when dirtying the first metadata buffer for the inode.
Signed-off-by: Jan Kara <jack@suse.cz>
---
fs/ext4/ext4.h | 2 +-
fs/ext4/ext4_jbd2.c | 24 +++++++++++++++++++++---
fs/ext4/fsync.c | 12 ++++++++----
fs/ext4/inode.c | 9 +++++----
fs/ext4/super.c | 7 ++++---
5 files changed, 39 insertions(+), 15 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 94283a991e5c..6bb29a20420f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1117,7 +1117,7 @@ struct ext4_inode_info {
struct rw_semaphore i_data_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
- struct mapping_metadata_bhs i_metadata_bhs;
+ struct mapping_metadata_bhs *i_metadata_bhs;
/*
* File creation time. Its function is same as that of
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 9a8c225f2753..752326f3b653 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -350,6 +350,21 @@ int __ext4_journal_get_create_access(const char *where, unsigned int line,
return 0;
}
+static void ext4_inode_attach_mmb(struct inode *inode)
+{
+ struct mapping_metadata_bhs *mmb;
+
+ /*
+ * It's difficult to handle failure when marking buffer dirty without
+ * leaving filesystem corrupted
+ */
+ mmb = kmalloc_obj(*mmb, GFP_NOFS | __GFP_NOFAIL);
+ mmb_init(mmb, &inode->i_data);
+ /* Someone swapped another mmb before us? */
+ if (cmpxchg(&EXT4_I(inode)->i_metadata_bhs, NULL, mmb))
+ kfree(mmb);
+}
+
int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
handle_t *handle, struct inode *inode,
struct buffer_head *bh)
@@ -389,11 +404,14 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
err);
}
} else {
- if (inode)
+ if (inode) {
+ if (!EXT4_I(inode)->i_metadata_bhs)
+ ext4_inode_attach_mmb(inode);
mmb_mark_buffer_dirty(bh,
- &EXT4_I(inode)->i_metadata_bhs);
- else
+ EXT4_I(inode)->i_metadata_bhs);
+ } else {
mark_buffer_dirty(bh);
+ }
if (inode && inode_needs_sync(inode)) {
sync_dirty_buffer(bh);
if (buffer_req(bh) && !buffer_uptodate(bh)) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 924726dcc85f..c104f55a0242 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -46,6 +46,7 @@
static int ext4_sync_parent(struct inode *inode)
{
struct dentry *dentry, *next;
+ struct mapping_metadata_bhs *mmb;
int ret = 0;
if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY))
@@ -68,9 +69,12 @@ static int ext4_sync_parent(struct inode *inode)
* through ext4_evict_inode()) and so we are safe to flush
* metadata blocks and the inode.
*/
- ret = mmb_sync(&EXT4_I(inode)->i_metadata_bhs);
- if (ret)
- break;
+ mmb = READ_ONCE(EXT4_I(inode)->i_metadata_bhs);
+ if (mmb) {
+ ret = mmb_sync(mmb);
+ if (ret)
+ break;
+ }
ret = sync_inode_metadata(inode, 1);
if (ret)
break;
@@ -89,7 +93,7 @@ static int ext4_fsync_nojournal(struct file *file, loff_t start, loff_t end,
};
int ret;
- ret = mmb_fsync_noflush(file, &EXT4_I(inode)->i_metadata_bhs,
+ ret = mmb_fsync_noflush(file, READ_ONCE(EXT4_I(inode)->i_metadata_bhs),
start, end, datasync);
if (ret)
return ret;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c2c2d6ac7f3d..3e66e9510909 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -195,9 +195,8 @@ void ext4_evict_inode(struct inode *inode)
ext4_warning_inode(inode, "data will be lost");
truncate_inode_pages_final(&inode->i_data);
- /* Avoid mballoc special inode which has no proper iops */
- if (!EXT4_SB(inode->i_sb)->s_journal)
- mmb_sync(&EXT4_I(inode)->i_metadata_bhs);
+ if (EXT4_I(inode)->i_metadata_bhs)
+ mmb_sync(EXT4_I(inode)->i_metadata_bhs);
goto no_delete;
}
@@ -3451,6 +3450,7 @@ static bool ext4_release_folio(struct folio *folio, gfp_t wait)
static bool ext4_inode_datasync_dirty(struct inode *inode)
{
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
+ struct mapping_metadata_bhs *mmb;
if (journal) {
if (jbd2_transaction_committed(journal,
@@ -3461,8 +3461,9 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
return true;
}
+ mmb = READ_ONCE(EXT4_I(inode)->i_metadata_bhs);
/* Any metadata buffers to write? */
- if (mmb_has_buffers(&EXT4_I(inode)->i_metadata_bhs))
+ if (mmb && mmb_has_buffers(mmb))
return true;
return inode_state_read_once(inode) & I_DIRTY_DATASYNC;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6a77db4d3124..7fc2cff708cc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1430,7 +1430,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
ext4_fc_init_inode(&ei->vfs_inode);
spin_lock_init(&ei->i_fc_lock);
- mmb_init(&ei->i_metadata_bhs, &ei->vfs_inode.i_data);
+ ei->i_metadata_bhs = NULL;
return &ei->vfs_inode;
}
@@ -1448,6 +1448,7 @@ static int ext4_drop_inode(struct inode *inode)
static void ext4_free_in_core_inode(struct inode *inode)
{
fscrypt_free_inode(inode);
+ kfree(EXT4_I(inode)->i_metadata_bhs);
if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
pr_warn("%s: inode %llu still in fc list",
__func__, inode->i_ino);
@@ -1527,8 +1528,8 @@ static void destroy_inodecache(void)
void ext4_clear_inode(struct inode *inode)
{
ext4_fc_del(inode);
- if (!EXT4_SB(inode->i_sb)->s_journal)
- mmb_invalidate(&EXT4_I(inode)->i_metadata_bhs);
+ if (EXT4_I(inode)->i_metadata_bhs)
+ mmb_invalidate(EXT4_I(inode)->i_metadata_bhs);
clear_inode(inode);
ext4_discard_preallocations(inode);
/*
--
2.51.0
next prev parent reply other threads:[~2026-05-25 8:58 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-25 8:58 [PATCH v2 0/10] fs: Fix missed inode write during fsync Jan Kara
2026-05-25 8:58 ` [PATCH v2 01/10] affs: Drop support for metadata bh tracking Jan Kara
2026-05-25 10:06 ` David Sterba
2026-05-25 8:58 ` Jan Kara [this message]
2026-06-03 13:41 ` [PATCH v2 02/10] ext4: Allocate mapping_metadata_bhs struct on demand Theodore Tso
2026-05-25 8:58 ` [PATCH v2 03/10] fs: Writeout inode buffer from mmb_sync() Jan Kara
2026-05-25 8:58 ` [PATCH v2 04/10] ext2: Fix possibly missing inode write on fsync(2) Jan Kara
2026-05-25 8:58 ` [PATCH v2 05/10] udf: " Jan Kara
2026-05-25 8:58 ` [PATCH v2 06/10] fat: " Jan Kara
2026-05-25 8:58 ` [PATCH v2 07/10] minix: " Jan Kara
2026-05-25 8:58 ` [PATCH v2 08/10] bfs: " Jan Kara
2026-05-25 8:58 ` [PATCH v2 09/10] ext4: Use mmb infrastructure for inode buffer writeout Jan Kara
2026-06-03 13:41 ` Theodore Tso
2026-05-25 8:58 ` [PATCH v2 10/10] fs: Fix missed inode writeback when racing with __writeback_single_inode Jan Kara
2026-06-02 7:22 ` [PATCH v2 0/10] fs: Fix missed inode write during fsync Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260525085821.769119-12-jack@suse.cz \
--to=jack@suse.cz \
--cc=aivazian.tigran@gmail.com \
--cc=brauner@kernel.org \
--cc=hirofumi@mail.parknet.co.jp \
--cc=linux-ext4@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox