From: Theodore Ts'o <tytso@mit.edu>
To: Ext4 Developers List <linux-ext4@vger.kernel.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Subject: [PATCH] ext4: Add support for data=alloc_on_commit mode
Date: Mon, 16 Mar 2009 23:19:58 -0400 [thread overview]
Message-ID: <1237259998-12656-1-git-send-email-tytso@mit.edu> (raw)
Add an ext3 bug-for-bug compatible analogue for data=ordered mode. In
this mode, we force all delayed allocation blocks involved with the
to-be-commited transaction to be allocated, and then flushed out to
disk before the transaction is commited.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
fs/ext4/ext4.h | 6 +++-
fs/ext4/ext4_jbd2.h | 3 +-
fs/ext4/inode.c | 12 +++++++++++
fs/ext4/super.c | 51 ++++++++++++++++++++++++++++++++++++-------------
fs/jbd2/commit.c | 3 ++
include/linux/jbd2.h | 2 +
6 files changed, 60 insertions(+), 17 deletions(-)
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ebd1a50..b15b03e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -541,8 +541,9 @@ do { \
#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
+#define EXT4_MOUNT_ORDERED_DATA 0x00000 /* Flush data before commit */
#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
-#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
+#define EXT4_MOUNT_ALLOC_COMMIT_DATA 0x00800 /* Alloc data on commit */
#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
@@ -820,10 +821,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_DEFM_XATTR_USER 0x0004
#define EXT4_DEFM_ACL 0x0008
#define EXT4_DEFM_UID16 0x0010
-#define EXT4_DEFM_JMODE 0x0060
+#define EXT4_DEFM_JMODE 0x00E0
#define EXT4_DEFM_JMODE_DATA 0x0020
#define EXT4_DEFM_JMODE_ORDERED 0x0040
#define EXT4_DEFM_JMODE_WBACK 0x0060
+#define EXT4_DEFM_JMODE_ALLOC_COMMIT 0x00C0
/*
* Default journal batch times
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index be2f426..0453671 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -274,7 +274,8 @@ static inline int ext4_should_order_data(struct inode *inode)
return 0;
if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
return 0;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ if ((test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) ||
+ (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ALLOC_COMMIT_DATA))
return 1;
return 0;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b58e7e2..ba0112b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2754,6 +2754,17 @@ static int ext4_da_write_end(struct file *file,
"dev %s ino %lu pos %llu len %u copied %u",
inode->i_sb->s_id, inode->i_ino,
(unsigned long long) pos, len, copied);
+
+ if (test_opt(inode->i_sb, DATA_FLAGS) ==
+ EXT4_MOUNT_ALLOC_COMMIT_DATA) {
+ ret = ext4_jbd2_file_inode(handle, inode);
+ if (ret)
+ goto errout;
+ ret = ext4_mark_inode_dirty(handle, inode);
+ if (ret)
+ goto errout;
+ }
+
start = pos & (PAGE_CACHE_SIZE - 1);
end = start + copied - 1;
@@ -2791,6 +2802,7 @@ static int ext4_da_write_end(struct file *file,
copied = ret2;
if (ret2 < 0)
ret = ret2;
+errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3f32fb2..93e1bf9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -67,7 +67,7 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
static int ext4_unfreeze(struct super_block *sb);
static void ext4_write_super(struct super_block *sb);
static int ext4_freeze(struct super_block *sb);
-
+static void alloc_on_commit_callback(journal_t *journal);
ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
struct ext4_group_desc *bg)
@@ -849,6 +849,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",data=ordered");
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
seq_puts(seq, ",data=writeback");
+ else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ALLOC_COMMIT_DATA)
+ seq_puts(seq, ",data=alloc_on_commit");
if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
seq_printf(seq, ",inode_readahead_blks=%u",
@@ -1012,7 +1014,7 @@ enum {
Opt_journal_update, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
- Opt_data_err_abort, Opt_data_err_ignore,
+ Opt_data_alloc_on_commit, Opt_data_err_abort, Opt_data_err_ignore,
Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
@@ -1056,6 +1058,7 @@ static const match_table_t tokens = {
{Opt_data_journal, "data=journal"},
{Opt_data_ordered, "data=ordered"},
{Opt_data_writeback, "data=writeback"},
+ {Opt_data_alloc_on_commit, "data=alloc_on_commit"},
{Opt_data_err_abort, "data_err=abort"},
{Opt_data_err_ignore, "data_err=ignore"},
{Opt_offusrjquota, "usrjquota="},
@@ -1273,6 +1276,9 @@ static int parse_options(char *options, struct super_block *sb,
case Opt_data_ordered:
data_opt = EXT4_MOUNT_ORDERED_DATA;
goto datacheck;
+ case Opt_data_alloc_on_commit:
+ data_opt = EXT4_MOUNT_ALLOC_COMMIT_DATA;
+ goto datacheck;
case Opt_data_writeback:
data_opt = EXT4_MOUNT_WRITEBACK_DATA;
datacheck:
@@ -1852,6 +1858,26 @@ static void ext4_orphan_cleanup(struct super_block *sb,
#endif
sb->s_flags = s_flags; /* Restore MS_RDONLY status */
}
+
+/*
+ * This callback is called before each commit when we are using
+ * alloc-on-commit mode.
+ */
+static void alloc_on_commit_callback(journal_t *journal)
+{
+ struct jbd2_inode *jinode, *next_i;
+ transaction_t *transaction = journal->j_running_transaction;
+
+ spin_lock(&journal->j_list_lock);
+ list_for_each_entry_safe(jinode, next_i,
+ &transaction->t_inode_list, i_list) {
+ spin_unlock(&journal->j_list_lock);
+ ext4_alloc_da_blocks(jinode->i_vfs_inode);
+ spin_lock(&journal->j_list_lock);
+ }
+ spin_unlock(&journal->j_list_lock);
+}
+
/*
* Maximal extent format file size.
* Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -2283,6 +2309,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
+ else if ((def_mount_opts & EXT4_DEFM_JMODE) ==
+ EXT4_DEFM_JMODE_ALLOC_COMMIT)
+ sbi->s_mount_opt |= EXT4_MOUNT_ALLOC_COMMIT_DATA;
if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
set_opt(sbi->s_mount_opt, ERRORS_PANIC);
@@ -2654,18 +2683,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
/* We have now updated the journal if required, so we can
* validate the data journaling mode. */
switch (test_opt(sb, DATA_FLAGS)) {
- case 0:
- /* No mode set, assume a default based on the journal
- * capabilities: ORDERED_DATA if the journal can
- * cope, else JOURNAL_DATA
- */
- if (jbd2_journal_check_available_features
- (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
- set_opt(sbi->s_mount_opt, ORDERED_DATA);
- else
- set_opt(sbi->s_mount_opt, JOURNAL_DATA);
- break;
-
+ case EXT4_MOUNT_ALLOC_COMMIT_DATA:
+ sbi->s_journal->j_pre_commit_callback =
+ alloc_on_commit_callback;
case EXT4_MOUNT_ORDERED_DATA:
case EXT4_MOUNT_WRITEBACK_DATA:
if (!jbd2_journal_check_available_features
@@ -2784,6 +2804,9 @@ no_journal:
descr = " journalled data mode";
else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
descr = " ordered data mode";
+ else if (test_opt(sb, DATA_FLAGS) ==
+ EXT4_MOUNT_ALLOC_COMMIT_DATA)
+ descr = " alloc on commit data mode";
else
descr = " writeback data mode";
} else
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 62804e5..e8a96e7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -379,6 +379,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
spin_unlock(&journal->j_list_lock);
#endif
+ if (journal->j_pre_commit_callback)
+ journal->j_pre_commit_callback(journal);
+
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 4d248b3..43b1689 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -975,6 +975,8 @@ struct journal_s
u32 j_min_batch_time;
u32 j_max_batch_time;
+ /* This function is called before a transaction is closed */
+ void (*j_pre_commit_callback)(journal_t *);
/* This function is called when a transaction is closed */
void (*j_commit_callback)(journal_t *,
transaction_t *);
--
1.5.6.3
next reply other threads:[~2009-03-17 3:20 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-03-17 3:19 Theodore Ts'o [this message]
2009-03-17 4:07 ` [PATCH] ext4: Add support for data=alloc_on_commit mode Eric Sandeen
2009-03-17 9:28 ` Aneesh Kumar K.V
2009-03-18 13:12 ` Jan Kara
2009-03-18 18:19 ` Theodore Tso
2009-03-18 4:18 ` Eric Sandeen
2009-03-19 16:42 ` Eric Sandeen
2009-03-19 21:50 ` Eric Sandeen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1237259998-12656-1-git-send-email-tytso@mit.edu \
--to=tytso@mit.edu \
--cc=linux-ext4@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.