From: npiggin@kernel.dk
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Subject: [patch 6/7] fs: fsync optimisations
Date: Wed, 24 Nov 2010 01:06:16 +1100 [thread overview]
Message-ID: <20101123140708.058372884@kernel.dk> (raw)
In-Reply-To: 20101123140610.292941494@kernel.dk
[-- Attachment #1: fs-datasync-opt.patch --]
[-- Type: text/plain, Size: 6862 bytes --]
Optimise fsync by adding a datasync parameter to sync_inode_metadata to
DTRT with writing back the inode (->write_inode in theory should have a
datasync parameter too perhaps, but that's for another time).
Also, implement the metadata sync optimally rather than reusing the
normal data writeback path. This means less useless moving the inode around the
writeback lists, and less dropping and retaking of inode_lock, and avoiding
the data writeback call with nr_pages == 0.
Signed-off-by: Nick Piggin <npiggin@kernel.dk>
Index: linux-2.6/drivers/staging/pohmelfs/inode.c
===================================================================
--- linux-2.6.orig/drivers/staging/pohmelfs/inode.c 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/drivers/staging/pohmelfs/inode.c 2010-11-23 22:57:45.000000000 +1100
@@ -883,7 +883,7 @@ static int pohmelfs_fsync(struct file *f
{
struct inode *inode = file->f_mapping->host;
- return sync_inode_metadata(inode, 1);
+ return sync_inode_metadata(inode, datasync, 1);
}
ssize_t pohmelfs_write(struct file *file, const char __user *buf,
Index: linux-2.6/fs/exofs/file.c
===================================================================
--- linux-2.6.orig/fs/exofs/file.c 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/fs/exofs/file.c 2010-11-23 22:57:45.000000000 +1100
@@ -48,7 +48,7 @@ static int exofs_file_fsync(struct file
struct inode *inode = filp->f_mapping->host;
struct super_block *sb;
- ret = sync_inode_metadata(inode, 1);
+ ret = sync_inode_metadata(inode, datasync, 1);
/* This is a good place to write the sb */
/* TODO: Sechedule an sb-sync on create */
Index: linux-2.6/fs/ext2/dir.c
===================================================================
--- linux-2.6.orig/fs/ext2/dir.c 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/fs/ext2/dir.c 2010-11-23 22:57:45.000000000 +1100
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page
if (IS_DIRSYNC(dir)) {
err = write_one_page(page, 1);
if (!err)
- err = sync_inode_metadata(dir, 1);
+ err = sync_inode_metadata(dir, 0, 1);
} else {
unlock_page(page);
}
Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c 2010-11-23 22:57:43.000000000 +1100
+++ linux-2.6/fs/ext2/inode.c 2010-11-23 22:57:45.000000000 +1100
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *in
inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
- sync_inode_metadata(inode, 1);
+ sync_inode_metadata(inode, 0, 1);
} else {
mark_inode_dirty(inode);
}
Index: linux-2.6/fs/ext2/xattr.c
===================================================================
--- linux-2.6.orig/fs/ext2/xattr.c 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/fs/ext2/xattr.c 2010-11-23 22:57:45.000000000 +1100
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, str
EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
inode->i_ctime = CURRENT_TIME_SEC;
if (IS_SYNC(inode)) {
- error = sync_inode_metadata(inode, 1);
+ error = sync_inode_metadata(inode, 0, 1);
/* In case sync failed due to ENOSPC the inode was actually
* written (only some dirty data were not) so we just proceed
* as if nothing happened and cleanup the unused block */
Index: linux-2.6/fs/fs-writeback.c
===================================================================
--- linux-2.6.orig/fs/fs-writeback.c 2010-11-23 22:57:40.000000000 +1100
+++ linux-2.6/fs/fs-writeback.c 2010-11-23 22:59:41.000000000 +1100
@@ -1307,7 +1307,7 @@ int sync_inode(struct inode *inode, stru
EXPORT_SYMBOL(sync_inode);
/**
- * sync_inode - write an inode to disk
+ * sync_inode_metadata - write an inode to disk
* @inode: the inode to sync
* @wait: wait for I/O to complete.
*
@@ -1315,13 +1315,49 @@ EXPORT_SYMBOL(sync_inode);
*
* Note: only writes the actual inode, no associated data or other metadata.
*/
-int sync_inode_metadata(struct inode *inode, int wait)
+int sync_inode_metadata(struct inode *inode, int datasync, int wait)
{
+ struct address_space *mapping = inode->i_mapping;
struct writeback_control wbc = {
.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
.nr_to_write = 0, /* metadata-only */
};
+ unsigned dirty, mask;
+ int ret = 0;
- return sync_inode(inode, &wbc);
+ /*
+ * This is a similar implementation to writeback_single_inode.
+ * Keep them in sync.
+ */
+ spin_lock(&inode_lock);
+ if (!inode_writeback_begin(inode, wait))
+ goto out;
+
+ if (datasync)
+ mask = I_DIRTY_DATASYNC;
+ else
+ mask = I_DIRTY_SYNC | I_DIRTY_DATASYNC;
+ dirty = inode->i_state & mask;
+ if (!dirty)
+ goto out;
+ /*
+ * Generic write_inode doesn't distinguish between sync and datasync,
+ * so even a datasync can clear the sync state. Filesystems which
+ * distiguish these cases must only clear 'mask' in their metadata
+ * sync code.
+ */
+ inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+
+ spin_unlock(&inode_lock);
+ ret = write_inode(inode, &wbc);
+ spin_lock(&inode_lock);
+ if (ret)
+ inode->i_state |= dirty; /* couldn't write out inode */
+
+ inode_writeback_end(inode);
+
+out:
+ spin_unlock(&inode_lock);
+ return ret;
}
EXPORT_SYMBOL(sync_inode_metadata);
Index: linux-2.6/fs/libfs.c
===================================================================
--- linux-2.6.orig/fs/libfs.c 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/fs/libfs.c 2010-11-23 22:57:45.000000000 +1100
@@ -895,7 +895,7 @@ int generic_file_fsync(struct file *file
int ret;
ret = sync_mapping_buffers(inode->i_mapping);
- err = sync_inode_metadata(inode, 1);
+ err = sync_inode_metadata(inode, datasync, 1);
if (ret == 0)
ret = err;
return ret;
Index: linux-2.6/fs/nfsd/vfs.c
===================================================================
--- linux-2.6.orig/fs/nfsd/vfs.c 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/fs/nfsd/vfs.c 2010-11-23 22:57:45.000000000 +1100
@@ -287,7 +287,7 @@ commit_metadata(struct svc_fh *fhp)
if (export_ops->commit_metadata)
return export_ops->commit_metadata(inode);
- return sync_inode_metadata(inode, 1);
+ return sync_inode_metadata(inode, 0, 1);
}
/*
Index: linux-2.6/include/linux/fs.h
===================================================================
--- linux-2.6.orig/include/linux/fs.h 2010-11-23 22:56:51.000000000 +1100
+++ linux-2.6/include/linux/fs.h 2010-11-23 22:57:45.000000000 +1100
@@ -1764,7 +1764,7 @@ static inline void file_accessed(struct
}
int sync_inode(struct inode *inode, struct writeback_control *wbc);
-int sync_inode_metadata(struct inode *inode, int wait);
+int sync_inode_metadata(struct inode *inode, int datasync, int wait);
int inode_writeback_begin(struct inode *inode, int wait);
int inode_writeback_end(struct inode *inode);
next prev parent reply other threads:[~2010-11-23 14:06 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-23 14:06 [patch 0/7] icache dirty / sync fixes npiggin
2010-11-23 14:06 ` [patch 1/7] fs: mark_inode_dirty barrier fix npiggin
2010-11-23 14:06 ` [patch 2/7] fs: simple fsync race fix npiggin
2010-11-29 14:58 ` Christoph Hellwig
2010-11-30 0:05 ` Nick Piggin
2010-11-23 14:06 ` [patch 3/7] fs: introduce inode writeback helpers npiggin
2010-11-29 15:13 ` Christoph Hellwig
2010-11-30 0:22 ` Nick Piggin
2010-11-23 14:06 ` [patch 4/7] fs: preserve inode dirty bits on failed metadata writeback npiggin
2010-11-29 14:59 ` Christoph Hellwig
2010-11-30 0:08 ` Nick Piggin
2010-11-23 14:06 ` [patch 5/7] fs: ext2 inode sync fix npiggin
2010-11-30 11:26 ` Boaz Harrosh
2010-11-23 14:06 ` npiggin [this message]
2010-11-29 15:03 ` [patch 6/7] fs: fsync optimisations Christoph Hellwig
2010-11-30 0:11 ` Nick Piggin
2010-11-23 14:06 ` [patch 7/7] fs: fix or note I_DIRTY handling bugs in filesystems npiggin
2010-11-23 15:04 ` Steven Whitehouse
2010-11-23 22:51 ` Dave Chinner
2010-11-24 0:23 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20101123140708.058372884@kernel.dk \
--to=npiggin@kernel.dk \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).