From: Suparna Bhattacharya <suparna@in.ibm.com>
To: linux-aio@kvack.org, akpm@osdl.org, drepper@redhat.com
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
jakub@redhat.com, mingo@elte.hu
Subject: [FSAIO][PATCH 8/8] AIO O_SYNC filesystem write
Date: Thu, 28 Dec 2006 14:14:08 +0530 [thread overview]
Message-ID: <20061228084408.GH6971@in.ibm.com> (raw)
In-Reply-To: <20061228082308.GA4476@in.ibm.com>
AIO support for O_SYNC buffered writes, built over O_SYNC-speedup.
It uses the tagged radix tree lookups to writeout just the pages
pertaining to this request, and retries instead of blocking
for writeback to complete on the same range. All the writeout is
issued at the time of io submission, and there is a check to make
sure that retries skip over straight to the wait_on_page_writeback_range.
Limitations: Extending file writes or hole overwrites with O_SYNC may
still block because we have yet to convert generic_osync_inode to be
asynchronous. For non O_SYNC writes, writeout happens in the background
and so typically appears async to the caller except for memory throttling
and non-block aligned writes involving read-modify-write.
Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
include/linux/aio.h | 0
linux-2.6.20-rc1-root/include/linux/fs.h | 13 +++++-
linux-2.6.20-rc1-root/mm/filemap.c | 61 +++++++++++++++++++++----------
3 files changed, 54 insertions(+), 20 deletions(-)
diff -puN include/linux/aio.h~aio-fs-write include/linux/aio.h
diff -puN mm/filemap.c~aio-fs-write mm/filemap.c
--- linux-2.6.20-rc1/mm/filemap.c~aio-fs-write 2006-12-21 08:46:21.000000000 +0530
+++ linux-2.6.20-rc1-root/mm/filemap.c 2006-12-21 08:46:21.000000000 +0530
@@ -239,10 +239,11 @@ EXPORT_SYMBOL(filemap_flush);
* @end: ending page index
*
* Wait for writeback to complete against pages indexed by start->end
- * inclusive
+ * inclusive. In AIO context, this may queue an async notification
+ * and retry callback and return, instead of blocking the caller.
*/
-int wait_on_page_writeback_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end)
+int __wait_on_page_writeback_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end, wait_queue_t *wait)
{
struct pagevec pvec;
int nr_pages;
@@ -254,20 +255,20 @@ int wait_on_page_writeback_range(struct
pagevec_init(&pvec, 0);
index = start;
- while ((index <= end) &&
+ while (!ret && (index <= end) &&
(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
PAGECACHE_TAG_WRITEBACK,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
unsigned i;
- for (i = 0; i < nr_pages; i++) {
+ for (i = 0; !ret && (i < nr_pages); i++) {
struct page *page = pvec.pages[i];
/* until radix tree lookup accepts end_index */
if (page->index > end)
continue;
- wait_on_page_writeback(page);
+ ret = __wait_on_page_writeback(page, wait);
if (PageError(page))
ret = -EIO;
}
@@ -303,18 +304,27 @@ int sync_page_range(struct inode *inode,
{
pgoff_t start = pos >> PAGE_CACHE_SHIFT;
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
- int ret;
+ int ret = 0;
if (!mapping_cap_writeback_dirty(mapping) || !count)
return 0;
+ if (in_aio()) {
+ /* Already issued writeouts for this iocb ? */
+ if (kiocbTryRestart(io_wait_to_kiocb(current->io_wait)))
+ goto do_wait; /* just need to check if done */
+ }
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
- if (ret == 0) {
+
+ if (ret >= 0) {
mutex_lock(&inode->i_mutex);
ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
mutex_unlock(&inode->i_mutex);
}
- if (ret == 0)
- ret = wait_on_page_writeback_range(mapping, start, end);
+do_wait:
+ if (ret >= 0) {
+ ret = __wait_on_page_writeback_range(mapping, start, end,
+ current->io_wait);
+ }
return ret;
}
EXPORT_SYMBOL(sync_page_range);
@@ -335,15 +345,23 @@ int sync_page_range_nolock(struct inode
{
pgoff_t start = pos >> PAGE_CACHE_SHIFT;
pgoff_t end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
- int ret;
+ int ret = 0;
if (!mapping_cap_writeback_dirty(mapping) || !count)
return 0;
+ if (in_aio()) {
+ /* Already issued writeouts for this iocb ? */
+ if (kiocbTryRestart(io_wait_to_kiocb(current->io_wait)))
+ goto do_wait; /* just need to check if done */
+ }
ret = filemap_fdatawrite_range(mapping, pos, pos + count - 1);
- if (ret == 0)
+ if (ret >= 0)
ret = generic_osync_inode(inode, mapping, OSYNC_METADATA);
- if (ret == 0)
- ret = wait_on_page_writeback_range(mapping, start, end);
+do_wait:
+ if (ret >= 0) {
+ ret = __wait_on_page_writeback_range(mapping, start, end,
+ current->io_wait);
+ }
return ret;
}
EXPORT_SYMBOL(sync_page_range_nolock);
@@ -2216,7 +2234,7 @@ zero_length_segment:
*/
if (likely(status >= 0)) {
if (unlikely((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
- if (!a_ops->writepage || !is_sync_kiocb(iocb))
+ if (!a_ops->writepage)
status = generic_osync_inode(inode, mapping,
OSYNC_METADATA|OSYNC_DATA);
}
@@ -2268,7 +2286,10 @@ __generic_file_aio_write_nolock(struct k
ocount -= iv->iov_len; /* This segment is no good */
break;
}
-
+ if (!is_sync_kiocb(iocb) && kiocbIsRestarted(iocb)) {
+ /* nothing to transfer, may just need to sync data */
+ return ocount;
+ }
count = ocount;
pos = *ppos;
@@ -2368,8 +2389,10 @@ ssize_t generic_file_aio_write_nolock(st
ssize_t err;
err = sync_page_range_nolock(inode, mapping, pos, ret);
- if (err < 0)
+ if (err < 0) {
ret = err;
+ iocb->ki_pos = pos;
+ }
}
return ret;
}
@@ -2394,8 +2417,10 @@ ssize_t generic_file_aio_write(struct ki
ssize_t err;
err = sync_page_range(inode, mapping, pos, ret);
- if (err < 0)
+ if (err < 0) {
ret = err;
+ iocb->ki_pos = pos;
+ }
}
return ret;
}
diff -puN include/linux/fs.h~aio-fs-write include/linux/fs.h
--- linux-2.6.20-rc1/include/linux/fs.h~aio-fs-write 2006-12-21 08:46:21.000000000 +0530
+++ linux-2.6.20-rc1-root/include/linux/fs.h 2006-12-21 08:46:21.000000000 +0530
@@ -279,6 +279,7 @@ extern int dir_notify_enable;
#include <linux/prio_tree.h>
#include <linux/init.h>
#include <linux/pid.h>
+#include <linux/sched.h>
#include <linux/mutex.h>
#include <asm/atomic.h>
@@ -1588,8 +1589,16 @@ extern int filemap_fdatawait(struct addr
extern int filemap_write_and_wait(struct address_space *mapping);
extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
-extern int wait_on_page_writeback_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
+extern int __wait_on_page_writeback_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end, wait_queue_t *wait);
+
+static inline int wait_on_page_writeback_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end)
+{
+ return __wait_on_page_writeback_range(mapping, start, end,
+ ¤t->__wait.wait);
+}
+
extern int __filemap_fdatawrite_range(struct address_space *mapping,
loff_t start, loff_t end, int sync_mode);
_
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org. For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
next prev parent reply other threads:[~2006-12-28 8:44 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-12-27 15:38 [RFC] Heads up on a series of AIO patchsets Suparna Bhattacharya
2006-12-27 16:25 ` Christoph Hellwig
2006-12-27 16:55 ` Ingo Molnar
2006-12-27 17:18 ` Ingo Molnar
2006-12-28 11:41 ` Evgeniy Polyakov
2007-01-02 21:38 ` Dan Williams
2007-01-03 13:35 ` Evgeniy Polyakov
2006-12-28 8:23 ` [PATCHSET 1][PATCH 0/6] Filesystem AIO read/write Suparna Bhattacharya
2006-12-28 8:34 ` [FSAIO][PATCH 1/6] Add a wait queue parameter to the wait_bit action routine Suparna Bhattacharya
2006-12-28 8:46 ` Suparna Bhattacharya
2006-12-28 8:36 ` [FSAIO][PATCH 2/8] Rename __lock_page to lock_page_slow Suparna Bhattacharya
2006-12-28 8:39 ` [FSAIO][PATCH 3/8] Routines to initialize and test a wait bit key Suparna Bhattacharya
2006-12-28 22:42 ` Andrew Morton
2006-12-28 8:39 ` [FSAIO][PATCH 4/8] Add a default io wait bit field in task struct Suparna Bhattacharya
2006-12-28 8:40 ` [FSAIO][PATCH 5/8] Enable wait bit based filtered wakeups to work for AIO Suparna Bhattacharya
2006-12-28 8:41 ` [FSAIO][PATCH 6/8] Enable asynchronous wait page and lock page Suparna Bhattacharya
2006-12-28 11:55 ` Christoph Hellwig
2006-12-28 14:47 ` Suparna Bhattacharya
2007-01-02 14:26 ` Christoph Hellwig
2007-01-04 6:50 ` Nick Piggin
2006-12-28 8:42 ` [FSAIO][PATCH 7/8] Filesystem AIO read Suparna Bhattacharya
2006-12-28 11:57 ` Christoph Hellwig
2006-12-28 14:15 ` Christoph Hellwig
2006-12-28 15:18 ` Suparna Bhattacharya
2007-01-02 14:29 ` Christoph Hellwig
2006-12-28 16:22 ` Jan Engelhardt
2006-12-28 16:56 ` Randy Dunlap
2006-12-28 8:44 ` Suparna Bhattacharya [this message]
2006-12-28 9:52 ` [PATCHSET 1][PATCH 0/6] Filesystem AIO read/write Ingo Molnar
2006-12-28 22:53 ` Andrew Morton
2007-01-03 22:15 ` Andrew Morton
2007-01-04 4:56 ` Suparna Bhattacharya
2007-01-04 5:51 ` Nick Piggin
2007-01-04 6:26 ` Suparna Bhattacharya
2007-01-04 6:50 ` Nick Piggin
2007-01-04 11:24 ` Suparna Bhattacharya
2007-01-05 4:56 ` Nick Piggin
2007-01-04 17:02 ` Andrew Morton
2007-01-04 17:49 ` Jens Axboe
2007-01-05 6:28 ` Suparna Bhattacharya
2007-01-05 7:02 ` Jens Axboe
2007-01-05 8:08 ` Suparna Bhattacharya
2007-01-05 8:32 ` Jens Axboe
2007-01-10 5:44 ` Suparna Bhattacharya
2007-01-11 1:08 ` Andrew Morton
2007-01-11 3:13 ` Suparna Bhattacharya
2007-01-11 4:52 ` Andrew Morton
2007-01-02 23:56 ` [RFC] Heads up on a series of AIO patchsets Zach Brown
[not found] ` <6f703f960701021640y444bc537w549fd6d74f3e9529@mail.gmail.com>
[not found] ` <A85B8249-FC4E-4612-8B28-02BC680DC812@oracle.com>
2007-01-03 1:18 ` Kent Overstreet
2007-01-04 20:33 ` Pavel Machek
2007-01-03 5:03 ` Suparna Bhattacharya
2007-01-05 0:36 ` Zach Brown
2007-01-03 7:23 ` [PATCHSET 2][PATCH 1/1] Combining epoll and disk file AIO Suparna Bhattacharya
2007-01-04 9:27 ` [PATCHSET 3][PATCH 0/5][AIO] - AIO completion signal notification v4 Bharata B Rao
2007-01-04 9:30 ` [PATCHSET 3][PATCH 1/5][AIO] - Rework compat_sys_io_submit Bharata B Rao
2007-01-04 9:32 ` [PATCHSET 3][PATCH 2/5][AIO] - fix aio.h includes Bharata B Rao
2007-01-04 9:34 ` [PATCHSET 3][PATCH 3/5][AIO] - Make good_sigevent non-static Bharata B Rao
2007-01-04 9:38 ` [PATCHSET 3][PATCH 4/5][AIO] - AIO completion signal notification Bharata B Rao
2007-01-04 9:40 ` [PATCHSET 3][PATCH 5/5][AIO] - Add listio support Bharata B Rao
2007-01-05 5:32 ` [PATCHSET 4][PATCH 1/1] AIO fallback for pipes, sockets and pollable fds Suparna Bhattacharya
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061228084408.GH6971@in.ibm.com \
--to=suparna@in.ibm.com \
--cc=akpm@osdl.org \
--cc=drepper@redhat.com \
--cc=jakub@redhat.com \
--cc=linux-aio@kvack.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).