From: Theodore Tso <tytso@mit.edu>
To: Ric Wheeler <rwheeler@redhat.com>
Cc: Andreas Dilger <adilger@sun.com>, Josef Bacik <jbacik@redhat.com>,
linux-ext4@vger.kernel.org
Subject: [PATCH] ext4: add fsync batch tuning knobs
Date: Tue, 25 Nov 2008 05:22:32 -0500 [thread overview]
Message-ID: <20081125102232.GA9882@mit.edu> (raw)
In-Reply-To: <4912E6F2.6010705@redhat.com>
On Thu, Nov 06, 2008 at 07:45:38AM -0500, Ric Wheeler wrote:
> I think that it could be useful to have a tunable knob for the max sleep
> time. It would certainly be capped at a much lower level for a RAM disk
> than it would be for a very large RAID volume...
Here is a patch which implements tuning knobs for Josef's fsync
batching patch, at least for ext4. I've implemented both a minimum
and maximum batch time. I've also changed the default max batch time
to be 15ms, independent of HZ setting.
It'd be great if someone could run some benchmarks to see what would
be ideal ways to twiddle these knobs for different types of storage
devices (and workloads, presumably).
- Ted
-------------
ext4: add fsync batch tuning knobs
From: "Theodore Ts'o" <tytso@mit.edu>
Add new mount options, min_batch_time and max_batch_time, which
controls how long the jbd2 layer should wait for additional filesystem
operations to get batched into a synchronous handle.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 8370ffd..6244c5d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -328,6 +328,7 @@ struct ext4_mount_options {
uid_t s_resuid;
gid_t s_resgid;
unsigned long s_commit_interval;
+ u32 s_min_batch_time, s_max_batch_time;
#ifdef CONFIG_QUOTA
int s_jquota_fmt;
char *s_qf_names[MAXQUOTAS];
@@ -806,6 +807,12 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
#define EXT4_DEFM_JMODE_WBACK 0x0060
/*
+ * Default journal batch times
+ */
+#define EXT4_DEF_MIN_BATCH_TIME 0
+#define EXT4_DEF_MAX_BATCH_TIME 15000 /* 15ms */
+
+/*
* Structure of a directory entry
*/
#define EXT4_NAME_LEN 255
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 2f3b8b1..f5b5d56 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -74,6 +74,8 @@ struct ext4_sb_info {
struct journal_s *s_journal;
struct list_head s_orphan;
unsigned long s_commit_interval;
+ u32 s_max_batch_time;
+ u32 s_min_batch_time;
struct block_device *journal_bdev;
#ifdef CONFIG_JBD2_DEBUG
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4aa2040..35500ed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -681,10 +681,19 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
#endif
if (!test_opt(sb, RESERVATION))
seq_puts(seq, ",noreservation");
- if (sbi->s_commit_interval) {
+ if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
seq_printf(seq, ",commit=%u",
(unsigned) (sbi->s_commit_interval / HZ));
}
+ if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
+ seq_printf(seq, ",min_batch_time=%u",
+ (unsigned) sbi->s_min_batch_time);
+ }
+ if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
+ seq_printf(seq, ",max_batch_time=%u",
+ (unsigned) sbi->s_min_batch_time);
+ }
+
/*
* We're changing the default of barrier mount option, so
* let's always display its mount state so it's clear what its
@@ -850,7 +859,8 @@ enum {
Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
- Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+ Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
+ Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
Opt_journal_checksum, Opt_journal_async_commit,
Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
Opt_data_err_abort, Opt_data_err_ignore,
@@ -889,6 +899,8 @@ static const match_table_t tokens = {
{Opt_nobh, "nobh"},
{Opt_bh, "bh"},
{Opt_commit, "commit=%u"},
+ {Opt_min_batch_time, "min_batch_time=%u"},
+ {Opt_max_batch_time, "max_batch_time=%u"},
{Opt_journal_update, "journal=update"},
{Opt_journal_inum, "journal=%u"},
{Opt_journal_dev, "journal_dev=%u"},
@@ -1107,6 +1119,20 @@ static int parse_options(char *options, struct super_block *sb,
option = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * option;
break;
+ case Opt_max_batch_time:
+ if (match_int(&args[0], &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ if (option == 0)
+ option = EXT4_DEF_MAX_BATCH_TIME;
+ sbi->s_max_batch_time = option;
+ case Opt_min_batch_time:
+ if (match_int(&args[0], &option))
+ return 0;
+ if (option < 0)
+ return 0;
+ sbi->s_min_batch_time = option;
case Opt_data_journal:
data_opt = EXT4_MOUNT_JOURNAL_DATA;
goto datacheck;
@@ -1955,6 +1981,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+ sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
+ sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
+ sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
set_opt(sbi->s_mount_opt, RESERVATION);
set_opt(sbi->s_mount_opt, BARRIER);
@@ -2484,11 +2513,9 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- if (sbi->s_commit_interval)
- journal->j_commit_interval = sbi->s_commit_interval;
- /* We could also set up an ext4-specific default for the commit
- * interval here, but for now we'll just fall back to the jbd
- * default. */
+ journal->j_commit_interval = sbi->s_commit_interval;
+ journal->j_min_batch_time = sbi->s_min_batch_time;
+ journal->j_max_batch_time = sbi->s_max_batch_time;
spin_lock(&journal->j_state_lock);
if (test_opt(sb, BARRIER))
@@ -2977,6 +3004,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
old_opts.s_resuid = sbi->s_resuid;
old_opts.s_resgid = sbi->s_resgid;
old_opts.s_commit_interval = sbi->s_commit_interval;
+ old_opts.s_min_batch_time = sbi->s_min_batch_time;
+ old_opts.s_max_batch_time = sbi->s_max_batch_time;
#ifdef CONFIG_QUOTA
old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++)
@@ -3106,6 +3135,8 @@ restore_opts:
sbi->s_resuid = old_opts.s_resuid;
sbi->s_resgid = old_opts.s_resgid;
sbi->s_commit_interval = old_opts.s_commit_interval;
+ sbi->s_min_batch_time = old_opts.s_min_batch_time;
+ sbi->s_max_batch_time = old_opts.s_max_batch_time;
#ifdef CONFIG_QUOTA
sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
for (i = 0; i < MAXQUOTAS; i++) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e70d657..dfacd9e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -961,6 +961,8 @@ static journal_t * journal_init_common (void)
spin_lock_init(&journal->j_state_lock);
journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
+ journal->j_min_batch_time = 0;
+ journal->j_max_batch_time = 15000; /* 15ms */
/* The journal is marked for error until we succeed with recovery! */
journal->j_flags = JBD2_ABORT;
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 0e1fc34..e7de7ac 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1255,9 +1255,11 @@ int jbd2_journal_stop(handle_t *handle)
trans_time = ktime_to_ns(ktime_sub(ktime_get(),
transaction->t_start_time));
+ commit_time = max_t(u64, commit_time,
+ 1000*journal->j_min_batch_time);
commit_time = min_t(u64, commit_time,
- 1000*jiffies_to_usecs(1));
next prev parent reply other threads:[~2008-11-25 10:22 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-11-04 16:10 [PATCH] imporve jbd2 fsync batching Josef Bacik
2008-11-04 20:52 ` Theodore Tso
2008-11-04 22:15 ` Leroy van Logchem
2008-11-05 23:10 ` Andreas Dilger
2008-11-06 0:27 ` Theodore Tso
2008-11-06 12:45 ` Ric Wheeler
2008-11-25 10:22 ` Theodore Tso [this message]
2008-12-02 14:45 ` [PATCH] ext4: add fsync batch tuning knobs Aneesh Kumar K.V
2008-11-06 14:35 ` [PATCH] imporve jbd2 fsync batching Josef Bacik
2008-11-25 10:23 ` Theodore Tso
2008-11-25 22:41 ` Andreas Dilger
2008-11-26 5:10 ` Theodore Tso
2008-11-26 13:18 ` Josef Bacik
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20081125102232.GA9882@mit.edu \
--to=tytso@mit.edu \
--cc=adilger@sun.com \
--cc=jbacik@redhat.com \
--cc=linux-ext4@vger.kernel.org \
--cc=rwheeler@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).