From: Michael Kerrisk <mtk.manpages@gmail.com>
To: Theodore Ts'o <tytso@mit.edu>
Cc: Linux Filesystem Development List <linux-fsdevel@vger.kernel.org>,
Al Viro <viro@zeniv.linux.org.uk>,
Linux API <linux-api@vger.kernel.org>
Subject: Re: [PATCH-v9 3/3] ext4: add optimization for the lazytime mount option
Date: Mon, 2 Feb 2015 07:03:59 +0100 [thread overview]
Message-ID: <CAHO5Pa1LVTLnu8Xecfyujpp7Py4dgP9+bZjYLn1F-HpgAbf7bw@mail.gmail.com> (raw)
In-Reply-To: <1422855422-7444-4-git-send-email-tytso@mit.edu>
[CC += linux-api@]
On Mon, Feb 2, 2015 at 6:37 AM, Theodore Ts'o <tytso@mit.edu> wrote:
> Add an optimization for the MS_LAZYTIME mount option so that we will
> opportunistically write out any inodes with the I_DIRTY_TIME flag set
> in a particular inode table block when we need to update some inode in
> that inode table block anyway.
>
> Also add some temporary code so that we can set the lazytime mount
> option without needing a modified /sbin/mount program which can set
> MS_LAZYTIME. We can eventually make this go away once util-linux has
> added support.
>
> Google-Bug-Id: 18297052
>
> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
> ---
> fs/ext4/inode.c | 64 +++++++++++++++++++++++++++++++++++++++++++--
> fs/ext4/super.c | 10 +++++++
> include/trace/events/ext4.h | 30 +++++++++++++++++++++
> 3 files changed, 102 insertions(+), 2 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 628df5b..9193ea1 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4139,6 +4139,65 @@ static int ext4_inode_blocks_set(handle_t *handle,
> return 0;
> }
>
> +struct other_inode {
> + unsigned long orig_ino;
> + struct ext4_inode *raw_inode;
> +};
> +
> +static int other_inode_match(struct inode * inode, unsigned long ino,
> + void *data)
> +{
> + struct other_inode *oi = (struct other_inode *) data;
> +
> + if ((inode->i_ino != ino) ||
> + (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
> + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) ||
> + ((inode->i_state & I_DIRTY_TIME) == 0))
> + return 0;
> + spin_lock(&inode->i_lock);
> + if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW |
> + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) &&
> + (inode->i_state & I_DIRTY_TIME)) {
> + struct ext4_inode_info *ei = EXT4_I(inode);
> +
> + inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED);
> + spin_unlock(&inode->i_lock);
> +
> + spin_lock(&ei->i_raw_lock);
> + EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode);
> + EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode);
> + EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode);
> + ext4_inode_csum_set(inode, oi->raw_inode, ei);
> + spin_unlock(&ei->i_raw_lock);
> + trace_ext4_other_inode_update_time(inode, oi->orig_ino);
> + return -1;
> + }
> + spin_unlock(&inode->i_lock);
> + return -1;
> +}
> +
> +/*
> + * Opportunistically update the other time fields for other inodes in
> + * the same inode table block.
> + */
> +static void ext4_update_other_inodes_time(struct super_block *sb,
> + unsigned long orig_ino, char *buf)
> +{
> + struct other_inode oi;
> + unsigned long ino;
> + int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
> + int inode_size = EXT4_INODE_SIZE(sb);
> +
> + oi.orig_ino = orig_ino;
> + ino = orig_ino & ~(inodes_per_block - 1);
> + for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) {
> + if (ino == orig_ino)
> + continue;
> + oi.raw_inode = (struct ext4_inode *) buf;
> + (void) find_inode_nowait(sb, ino, other_inode_match, &oi);
> + }
> +}
> +
> /*
> * Post the struct inode info into an on-disk inode location in the
> * buffer-cache. This gobbles the caller's reference to the
> @@ -4248,10 +4307,11 @@ static int ext4_do_update_inode(handle_t *handle,
> cpu_to_le16(ei->i_extra_isize);
> }
> }
> -
> ext4_inode_csum_set(inode, raw_inode, ei);
> -
> spin_unlock(&ei->i_raw_lock);
> + if (inode->i_sb->s_flags & MS_LAZYTIME)
> + ext4_update_other_inodes_time(inode->i_sb, inode->i_ino,
> + bh->b_data);
>
> BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
> rc = ext4_handle_dirty_metadata(handle, NULL, bh);
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 74c5f53..362b23c 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1139,6 +1139,7 @@ enum {
> Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
> Opt_usrquota, Opt_grpquota, Opt_i_version,
> Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
> + Opt_lazytime, Opt_nolazytime,
> Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
> Opt_inode_readahead_blks, Opt_journal_ioprio,
> Opt_dioread_nolock, Opt_dioread_lock,
> @@ -1202,6 +1203,8 @@ static const match_table_t tokens = {
> {Opt_i_version, "i_version"},
> {Opt_stripe, "stripe=%u"},
> {Opt_delalloc, "delalloc"},
> + {Opt_lazytime, "lazytime"},
> + {Opt_nolazytime, "nolazytime"},
> {Opt_nodelalloc, "nodelalloc"},
> {Opt_removed, "mblk_io_submit"},
> {Opt_removed, "nomblk_io_submit"},
> @@ -1459,6 +1462,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
> case Opt_i_version:
> sb->s_flags |= MS_I_VERSION;
> return 1;
> + case Opt_lazytime:
> + sb->s_flags |= MS_LAZYTIME;
> + return 1;
> + case Opt_nolazytime:
> + sb->s_flags &= ~MS_LAZYTIME;
> + return 1;
> }
>
> for (m = ext4_mount_opts; m->token != Opt_err; m++)
> @@ -5020,6 +5029,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
> }
> #endif
>
> + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME);
> ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data);
> kfree(orig_data);
> return 0;
> diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
> index 6cfb841..6e5abd6 100644
> --- a/include/trace/events/ext4.h
> +++ b/include/trace/events/ext4.h
> @@ -73,6 +73,36 @@ struct extent_status;
> { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
>
>
> +TRACE_EVENT(ext4_other_inode_update_time,
> + TP_PROTO(struct inode *inode, ino_t orig_ino),
> +
> + TP_ARGS(inode, orig_ino),
> +
> + TP_STRUCT__entry(
> + __field( dev_t, dev )
> + __field( ino_t, ino )
> + __field( ino_t, orig_ino )
> + __field( uid_t, uid )
> + __field( gid_t, gid )
> + __field( __u16, mode )
> + ),
> +
> + TP_fast_assign(
> + __entry->orig_ino = orig_ino;
> + __entry->dev = inode->i_sb->s_dev;
> + __entry->ino = inode->i_ino;
> + __entry->uid = i_uid_read(inode);
> + __entry->gid = i_gid_read(inode);
> + __entry->mode = inode->i_mode;
> + ),
> +
> + TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u",
> + MAJOR(__entry->dev), MINOR(__entry->dev),
> + (unsigned long) __entry->orig_ino,
> + (unsigned long) __entry->ino, __entry->mode,
> + __entry->uid, __entry->gid)
> +);
> +
> TRACE_EVENT(ext4_free_inode,
> TP_PROTO(struct inode *inode),
>
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/
Author of "The Linux Programming Interface", http://blog.man7.org/
next prev parent reply other threads:[~2015-02-02 6:03 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <1422855422-7444-1-git-send-email-tytso@mit.edu>
2015-02-02 6:03 ` [PATCH-v9 0/3] add support for lazytime mount option Michael Kerrisk
[not found] ` <CAHO5Pa0ySnLb_UGUw3deVyZEr8gdzzdeyMP5rXcT1MLOeccLGg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-02-02 14:48 ` Theodore Ts'o
[not found] ` <20150202144833.GB2509-AKGzg7BKzIDYtjvyW6yDsg@public.gmane.org>
2015-02-02 15:40 ` Michael Kerrisk (man-pages)
[not found] ` <1422855422-7444-2-git-send-email-tytso@mit.edu>
[not found] ` <1422855422-7444-2-git-send-email-tytso-3s7WtUTddSA@public.gmane.org>
2015-02-02 6:03 ` [PATCH-v9 1/3] vfs: add support for a " Michael Kerrisk
[not found] ` <1422855422-7444-4-git-send-email-tytso@mit.edu>
2015-02-02 6:03 ` Michael Kerrisk [this message]
[not found] ` <1422855422-7444-3-git-send-email-tytso@mit.edu>
[not found] ` <1422855422-7444-3-git-send-email-tytso-3s7WtUTddSA@public.gmane.org>
2015-02-02 6:04 ` [PATCH-v9 2/3] vfs: add find_inode_nowait() function Michael Kerrisk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAHO5Pa1LVTLnu8Xecfyujpp7Py4dgP9+bZjYLn1F-HpgAbf7bw@mail.gmail.com \
--to=mtk.manpages@gmail.com \
--cc=linux-api@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=tytso@mit.edu \
--cc=viro@zeniv.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).