From: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
To: linux-ext4@vger.kernel.org
Cc: tytso@mit.edu, jack@suse.cz, harshads@google.com,
Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Subject: [PATCH v7 4/9] ext4: rework fast commit commit path
Date: Sun, 18 Aug 2024 04:03:51 +0000 [thread overview]
Message-ID: <20240818040356.241684-6-harshadshirwadkar@gmail.com> (raw)
In-Reply-To: <20240818040356.241684-1-harshadshirwadkar@gmail.com>
This patch reworks fast commit's commit path to remove locking the
journal for the entire duration of a fast commit. Instead, we only lock
the journal while marking all the eligible inodes as "committing". This
allows handles to make progress in parallel with the fast commit.
Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
---
fs/ext4/fast_commit.c | 74 ++++++++++++++++++++++++++++---------------
fs/jbd2/journal.c | 2 --
2 files changed, 49 insertions(+), 27 deletions(-)
diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
index dfa999913..7a35234ce 100644
--- a/fs/ext4/fast_commit.c
+++ b/fs/ext4/fast_commit.c
@@ -291,20 +291,30 @@ void ext4_fc_del(struct inode *inode)
if (ext4_fc_disabled(inode->i_sb))
return;
-restart:
spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
if (list_empty(&ei->i_fc_list) && list_empty(&ei->i_fc_dilist)) {
spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
return;
}
- if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
- ext4_fc_wait_committing_inode(inode);
- goto restart;
- }
-
- if (!list_empty(&ei->i_fc_list))
- list_del_init(&ei->i_fc_list);
+ /*
+ * Since ext4_fc_del is called from ext4_evict_inode while having a
+ * handle open, there is no need for us to wait here even if a fast
+ * commit is going on. That is because, if this inode is being
+ * committed, ext4_mark_inode_dirty would have waited for inode commit
+ * operation to finish before we come here. So, by the time we come
+ * here, inode's EXT4_STATE_FC_COMMITTING would have been cleared. So,
+ * we shouldn't see EXT4_STATE_FC_COMMITTING to be set on this inode
+ * here.
+ *
+ * We may come here without any handles open in the "no_delete" case of
+ * ext4_evict_inode as well. However, if that happens, we first mark the
+ * file system as fast commit ineligible anyway. So, even in that case,
+ * it is okay to remove the inode from the fc list.
+ */
+ WARN_ON(ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)
+ && !ext4_test_mount_flag(inode->i_sb, EXT4_MF_FC_INELIGIBLE));
+ list_del_init(&ei->i_fc_list);
/*
* Since this inode is getting removed, let's also remove all FC
@@ -327,8 +337,6 @@ void ext4_fc_del(struct inode *inode)
fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
kfree(fc_dentry->fcd_name.name);
kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
-
- return;
}
/*
@@ -1004,19 +1012,6 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
spin_lock(&sbi->s_fc_lock);
list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
- ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
- while (atomic_read(&ei->i_fc_updates)) {
- DEFINE_WAIT(wait);
-
- prepare_to_wait(&ei->i_fc_wait, &wait,
- TASK_UNINTERRUPTIBLE);
- if (atomic_read(&ei->i_fc_updates)) {
- spin_unlock(&sbi->s_fc_lock);
- schedule();
- spin_lock(&sbi->s_fc_lock);
- }
- finish_wait(&ei->i_fc_wait, &wait);
- }
spin_unlock(&sbi->s_fc_lock);
ret = jbd2_submit_inode_data(journal, ei->jinode);
if (ret)
@@ -1129,6 +1124,19 @@ static int ext4_fc_perform_commit(journal_t *journal)
int ret = 0;
u32 crc = 0;
+ /*
+ * Wait for all the handles of the current transaction to complete
+ * and then lock the journal.
+ */
+ jbd2_journal_lock_updates(journal);
+ spin_lock(&sbi->s_fc_lock);
+ list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
+ ext4_set_inode_state(&iter->vfs_inode,
+ EXT4_STATE_FC_COMMITTING);
+ }
+ spin_unlock(&sbi->s_fc_lock);
+ jbd2_journal_unlock_updates(journal);
+
ret = ext4_fc_submit_inode_data_all(journal);
if (ret)
return ret;
@@ -1179,6 +1187,18 @@ static int ext4_fc_perform_commit(journal_t *journal)
ret = ext4_fc_write_inode(inode, &crc);
if (ret)
goto out;
+ ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
+ /*
+ * Make sure clearing of EXT4_STATE_FC_COMMITTING is
+ * visible before we send the wakeup. Pairs with implicit
+ * barrier in prepare_to_wait() in ext4_fc_track_inode().
+ */
+ smp_mb();
+#if (BITS_PER_LONG < 64)
+ wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
+#else
+ wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
+#endif
spin_lock(&sbi->s_fc_lock);
}
spin_unlock(&sbi->s_fc_lock);
@@ -1316,13 +1336,17 @@ static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
spin_lock(&sbi->s_fc_lock);
list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
i_fc_list) {
- list_del_init(&iter->i_fc_list);
ext4_clear_inode_state(&iter->vfs_inode,
EXT4_STATE_FC_COMMITTING);
if (tid_geq(tid, iter->i_sync_tid))
ext4_fc_reset_inode(&iter->vfs_inode);
- /* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
+ /*
+ * Make sure clearing of EXT4_STATE_FC_COMMITTING is
+ * visible before we send the wakeup. Pairs with implicit
+ * barrier in prepare_to_wait() in ext4_fc_track_inode().
+ */
smp_mb();
+ list_del_init(&iter->i_fc_list);
#if (BITS_PER_LONG < 64)
wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
#else
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 1ebf2393b..ecd70b506 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -728,7 +728,6 @@ int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
}
journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
write_unlock(&journal->j_state_lock);
- jbd2_journal_lock_updates(journal);
return 0;
}
@@ -740,7 +739,6 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
*/
static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
{
- jbd2_journal_unlock_updates(journal);
if (journal->j_fc_cleanup_callback)
journal->j_fc_cleanup_callback(journal, 0, tid);
write_lock(&journal->j_state_lock);
--
2.46.0.184.g6999bdac58-goog
next prev parent reply other threads:[~2024-08-18 4:04 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-08-18 4:03 [PATCH] ext4: convert i_fc_lock to spinlock Harshad Shirwadkar
2024-08-18 4:03 ` [PATCH v7 0/9] Ext4 Fast Commit Performance Patchset Harshad Shirwadkar
2024-08-18 4:49 ` harshad shirwadkar
2024-08-18 4:03 ` [PATCH v7 1/9] ext4: convert i_fc_lock to spinlock Harshad Shirwadkar
2024-08-18 4:03 ` [PATCH v7 2/9] ext4: for committing inode, make ext4_fc_track_inode wait Harshad Shirwadkar
2024-12-12 22:00 ` Jan Kara
2024-12-13 15:10 ` Jan Kara
2025-04-14 16:57 ` harshad shirwadkar
2024-08-18 4:03 ` [PATCH v7 3/9] ext4: mark inode dirty before grabbing i_data_sem in ext4_setattr Harshad Shirwadkar
2024-12-12 21:57 ` Jan Kara
2024-08-18 4:03 ` Harshad Shirwadkar [this message]
2024-12-13 15:12 ` [PATCH v7 4/9] ext4: rework fast commit commit path Jan Kara
2024-08-18 4:03 ` [PATCH v7 5/9] ext4: drop i_fc_updates from inode fc info Harshad Shirwadkar
2024-08-18 4:03 ` [PATCH v7 6/9] ext4: update code documentation Harshad Shirwadkar
2024-12-13 15:15 ` Jan Kara
2024-08-18 4:03 ` [PATCH v7 7/9] ext4: temporarily elevate commit thread priority Harshad Shirwadkar
2024-08-18 4:03 ` [PATCH v7 8/9] ext4: make fast commit ineligible on ext4_reserve_inode_write failure Harshad Shirwadkar
2024-12-16 10:40 ` Jan Kara
2024-08-18 4:03 ` [PATCH v7 9/9] ext4: hold s_fc_lock while during fast commit Harshad Shirwadkar
2024-12-16 10:50 ` Jan Kara
2025-01-13 14:16 ` Baokun Li
2025-04-14 16:59 ` harshad shirwadkar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240818040356.241684-6-harshadshirwadkar@gmail.com \
--to=harshadshirwadkar@gmail.com \
--cc=harshads@google.com \
--cc=jack@suse.cz \
--cc=linux-ext4@vger.kernel.org \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox