From: Suparna Bhattacharya <suparna@in.ibm.com>
To: akpm@osdl.org
Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org
Subject: [PATCH] Fix O_SYNC speedup for generic_file_write_nolock
Date: Mon, 8 Nov 2004 15:37:38 +0530 [thread overview]
Message-ID: <20041108100738.GA4003@in.ibm.com> (raw)
The O_SYNC speedup patches missed the generic_file_xxx_nolock cases,
which means that pages weren't actually getting sync'ed in those
cases. This patch fixes that.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
include/linux/writeback.h | 2 +
mm/filemap.c | 67 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 66 insertions(+), 3 deletions(-)
diff -urp -X dontdiff2 linux-2.6.10-rc1/include/linux/writeback.h linux-2.6.10-rc1-aio/include/linux/writeback.h
--- linux-2.6.10-rc1/include/linux/writeback.h 2004-11-03 12:04:10.000000000 +0530
+++ linux-2.6.10-rc1-aio/include/linux/writeback.h 2004-11-04 10:10:31.000000000 +0530
@@ -106,6 +106,8 @@ int pdflush_operation(void (*fn)(unsigne
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
int sync_page_range(struct inode *inode, struct address_space *mapping,
loff_t pos, size_t count);
+int sync_page_range_nolock(struct inode *inode, struct address_space
+ *mapping, loff_t pos, size_t count);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
diff -urp -X dontdiff2 linux-2.6.10-rc1/mm/filemap.c linux-2.6.10-rc1-aio/mm/filemap.c
--- linux-2.6.10-rc1/mm/filemap.c 2004-11-03 12:04:24.000000000 +0530
+++ linux-2.6.10-rc1-aio/mm/filemap.c 2004-11-04 10:10:31.000000000 +0530
@@ -283,6 +283,30 @@ int sync_page_range(struct inode *inode,
}
EXPORT_SYMBOL(sync_page_range);
+/*
+ * Note: Holding i_sem across sync_page_range_nolock is not a good idea
+ * as it forces O_SYNC writers to different parts of the same file
+ * to be serialised right until io completion.
+ */
+int sync_page_range_nolock(struct inode *inode, struct address_space *mapping,
+ loff_t pos, size_t count)
+{
+ pgoff_t start = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
+ int ret;
+
+ if (mapping->backing_dev_info->memory_backed || !count)
+ return 0;
+ ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
+ if (ret == 0) {
+ ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
+ }
+ if (ret == 0)
+ ret = wait_on_page_writeback_range(mapping, start, end);
+ return ret;
+}
+EXPORT_SYMBOL(sync_page_range_nolock);
+
/**
* filemap_fdatawait - walk the list of under-writeback pages of the given
* address space and wait for all of them.
@@ -1998,7 +2022,7 @@ generic_file_buffered_write(struct kiocb
EXPORT_SYMBOL(generic_file_buffered_write);
ssize_t
-generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
+__generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
struct file *file = iocb->ki_filp;
@@ -2075,6 +2099,43 @@ out:
EXPORT_SYMBOL(generic_file_aio_write_nolock);
ssize_t
+generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
+ ssize_t ret;
+ loff_t pos = *ppos;
+
+ ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos);
+
+ if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
+ int err;
+
+ err = sync_page_range_nolock(inode, mapping, pos, ret);
+ if (err < 0)
+ ret = err;
+ }
+ return ret;
+}
+
+
+ssize_t
+__generic_file_write_nolock(struct file *file, const struct iovec *iov,
+ unsigned long nr_segs, loff_t *ppos)
+{
+ struct kiocb kiocb;
+ ssize_t ret;
+
+ init_sync_kiocb(&kiocb, file);
+ ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos);
+ if (-EIOCBQUEUED == ret)
+ ret = wait_on_sync_kiocb(&kiocb);
+ return ret;
+}
+
+ssize_t
generic_file_write_nolock(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *ppos)
{
@@ -2128,7 +2189,7 @@ ssize_t generic_file_write(struct file *
.iov_len = count };
down(&inode->i_sem);
- ret = generic_file_write_nolock(file, &local_iov, 1, ppos);
+ ret = __generic_file_write_nolock(file, &local_iov, 1, ppos);
up(&inode->i_sem);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
@@ -2165,7 +2226,7 @@ ssize_t generic_file_writev(struct file
ssize_t ret;
down(&inode->i_sem);
- ret = generic_file_write_nolock(file, iov, nr_segs, ppos);
+ ret = __generic_file_write_nolock(file, iov, nr_segs, ppos);
up(&inode->i_sem);
if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
next reply other threads:[~2004-11-08 9:58 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-11-08 10:07 Suparna Bhattacharya [this message]
2004-11-08 10:04 ` [PATCH] Fix O_SYNC speedup for generic_file_write_nolock Arjan van de Ven
2004-11-08 11:53 ` Suparna Bhattacharya
2004-11-08 12:05 ` Arjan van de Ven
2004-11-08 12:32 ` Suparna Bhattacharya
2004-11-08 15:20 ` Joel Becker
2004-11-08 15:31 ` Arjan van de Ven
2004-11-08 18:29 ` Joel Becker
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20041108100738.GA4003@in.ibm.com \
--to=suparna@in.ibm.com \
--cc=akpm@osdl.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.