* [PATCH] add support for vectored and async I/O to all simple filesystems
@ 2005-11-01 2:36 Christoph Hellwig
2005-11-01 10:28 ` Miklos Szeredi
2005-11-01 19:20 ` Jamie Lokier
0 siblings, 2 replies; 15+ messages in thread
From: Christoph Hellwig @ 2005-11-01 2:36 UTC (permalink / raw)
To: akpm; +Cc: linux-fsdevel
Every filesystem using generic_file_read/generic_file_write directly
can easily support vectored and async (well at least the API, it's not
async quite yet in mainline) I/O. This patch adds the proper vectors
to all filesystems and switches them to do_sync_read/do_sync_write.
This we we can get rid of plain generic_file_read/generic_file_write
soon and remove one of the many variants of those functions. Long
term I plan to unify aio and vectored support into a single operation
so we can cut down filemap.c to a sane set of these routines.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Index: linux-2.6/drivers/char/raw.c
===================================================================
--- linux-2.6.orig/drivers/char/raw.c 2005-10-31 15:44:42.000000000 +0100
+++ linux-2.6/drivers/char/raw.c 2005-10-31 15:46:29.000000000 +0100
@@ -262,7 +262,7 @@
static struct file_operations raw_fops = {
- .read = generic_file_read,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = raw_file_write,
.aio_write = raw_file_aio_write,
Index: linux-2.6/fs/adfs/file.c
===================================================================
--- linux-2.6.orig/fs/adfs/file.c 2005-10-31 15:44:42.000000000 +0100
+++ linux-2.6/fs/adfs/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -31,11 +31,16 @@
struct file_operations adfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = file_fsync,
- .write = generic_file_write,
.sendfile = generic_file_sendfile,
+
};
struct inode_operations adfs_file_inode_operations = {
Index: linux-2.6/fs/bfs/file.c
===================================================================
--- linux-2.6.orig/fs/bfs/file.c 2005-10-31 15:44:42.000000000 +0100
+++ linux-2.6/fs/bfs/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -19,8 +19,12 @@
struct file_operations bfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6/fs/ext2/file.c
===================================================================
--- linux-2.6.orig/fs/ext2/file.c 2005-10-31 15:44:42.000000000 +0100
+++ linux-2.6/fs/ext2/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -41,8 +41,8 @@
*/
struct file_operations ext2_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.ioctl = ext2_ioctl,
Index: linux-2.6/fs/fuse/file.c
===================================================================
--- linux-2.6.orig/fs/fuse/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/fuse/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -523,8 +523,12 @@
static struct file_operations fuse_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
Index: linux-2.6/fs/hfs/inode.c
===================================================================
--- linux-2.6.orig/fs/hfs/inode.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/hfs/inode.c 2005-10-31 15:46:29.000000000 +0100
@@ -616,8 +616,12 @@
static struct file_operations hfs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6/fs/hfsplus/inode.c
===================================================================
--- linux-2.6.orig/fs/hfsplus/inode.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/hfsplus/inode.c 2005-10-31 15:46:29.000000000 +0100
@@ -302,8 +302,12 @@
static struct file_operations hfsplus_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
.fsync = file_fsync,
Index: linux-2.6/fs/hostfs/hostfs_kern.c
===================================================================
--- linux-2.6.orig/fs/hostfs/hostfs_kern.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/hostfs/hostfs_kern.c 2005-10-31 15:46:29.000000000 +0100
@@ -388,13 +388,13 @@
static struct file_operations hostfs_file_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.readv = generic_file_readv,
.writev = generic_file_writev,
- .write = generic_file_write,
+ .write = do_sync_write,
.mmap = generic_file_mmap,
.open = hostfs_file_open,
.release = NULL,
Index: linux-2.6/fs/jffs/inode-v23.c
===================================================================
--- linux-2.6.orig/fs/jffs/inode-v23.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/jffs/inode-v23.c 2005-10-31 15:46:29.000000000 +0100
@@ -1633,8 +1633,12 @@
{
.open = generic_file_open,
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs_fsync,
Index: linux-2.6/fs/jffs2/file.c
===================================================================
--- linux-2.6.orig/fs/jffs2/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/jffs2/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -42,8 +42,12 @@
{
.llseek = generic_file_llseek,
.open = generic_file_open,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.ioctl = jffs2_ioctl,
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
Index: linux-2.6/fs/jfs/file.c
===================================================================
--- linux-2.6.orig/fs/jfs/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/jfs/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -103,8 +103,8 @@
struct file_operations jfs_file_operations = {
.open = jfs_open,
.llseek = generic_file_llseek,
- .write = generic_file_write,
- .read = generic_file_read,
+ .write = do_sync_write,
+ .read = do_sync_read,
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
Index: linux-2.6/fs/minix/file.c
===================================================================
--- linux-2.6.orig/fs/minix/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/minix/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -17,8 +17,12 @@
struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = minix_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6/fs/ramfs/inode.c
===================================================================
--- linux-2.6.orig/fs/ramfs/inode.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/ramfs/inode.c 2005-10-31 15:46:29.000000000 +0100
@@ -149,8 +149,12 @@
};
struct file_operations ramfs_file_operations = {
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = simple_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6/fs/read_write.c
===================================================================
--- linux-2.6.orig/fs/read_write.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/read_write.c 2005-10-31 15:46:29.000000000 +0100
@@ -20,7 +20,9 @@
struct file_operations generic_ro_fops = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .readv = generic_file_readv,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
};
Index: linux-2.6/fs/sysv/file.c
===================================================================
--- linux-2.6.orig/fs/sysv/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/sysv/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -21,8 +21,12 @@
*/
struct file_operations sysv_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.fsync = sysv_sync_file,
.sendfile = generic_file_sendfile,
Index: linux-2.6/fs/ufs/file.c
===================================================================
--- linux-2.6.orig/fs/ufs/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/ufs/file.c 2005-10-31 15:46:29.000000000 +0100
@@ -43,8 +43,12 @@
struct file_operations ufs_file_operations = {
.llseek = generic_file_llseek,
- .read = generic_file_read,
- .write = generic_file_write,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
+ .aio_read = generic_file_aio_read,
+ .aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = generic_file_open,
.sendfile = generic_file_sendfile,
Index: linux-2.6/fs/reiserfs/file.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/reiserfs/file.c 2005-10-31 15:46:46.000000000 +0100
@@ -1541,22 +1541,18 @@
return res;
}
-static ssize_t reiserfs_aio_write(struct kiocb *iocb, const char __user * buf,
- size_t count, loff_t pos)
-{
- return generic_file_aio_write(iocb, buf, count, pos);
-}
-
struct file_operations reiserfs_file_operations = {
- .read = generic_file_read,
+ .read = do_sync_read,
.write = reiserfs_file_write,
+ .readv = generic_file_readv,
+ .writev = generic_file_writev,
.ioctl = reiserfs_ioctl,
.mmap = generic_file_mmap,
.release = reiserfs_file_release,
.fsync = reiserfs_sync_file,
.sendfile = generic_file_sendfile,
.aio_read = generic_file_aio_read,
- .aio_write = reiserfs_aio_write,
+ .aio_write = generic_file_aio_write,
};
struct inode_operations reiserfs_file_inode_operations = {
Index: linux-2.6/fs/block_dev.c
===================================================================
--- linux-2.6.orig/fs/block_dev.c 2005-10-31 15:44:42.000000000 +0100
+++ linux-2.6/fs/block_dev.c 2005-10-31 15:47:23.000000000 +0100
@@ -796,7 +796,7 @@
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
.write = blkdev_file_write,
.aio_read = generic_file_aio_read,
.aio_write = blkdev_file_aio_write,
Index: linux-2.6/fs/ntfs/file.c
===================================================================
--- linux-2.6.orig/fs/ntfs/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/ntfs/file.c 2005-10-31 15:47:52.000000000 +0100
@@ -2305,7 +2305,7 @@
struct file_operations ntfs_file_ops = {
.llseek = generic_file_llseek, /* Seek inside file. */
- .read = generic_file_read, /* Read from file. */
+ .read = do_sync_read, /* Read from file. */
.aio_read = generic_file_aio_read, /* Async read from file. */
.readv = generic_file_readv, /* Read from file. */
#ifdef NTFS_RW
Index: linux-2.6/fs/qnx4/file.c
===================================================================
--- linux-2.6.orig/fs/qnx4/file.c 2005-10-31 15:44:43.000000000 +0100
+++ linux-2.6/fs/qnx4/file.c 2005-10-31 15:48:59.000000000 +0100
@@ -25,11 +25,15 @@
struct file_operations qnx4_file_operations =
{
.llseek = generic_file_llseek,
- .read = generic_file_read,
+ .read = do_sync_read,
+ .readv = generic_file_readv,
+ .aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
.sendfile = generic_file_sendfile,
#ifdef CONFIG_QNX4FS_RW
- .write = generic_file_write,
+ .write = do_sync_write,
+ .writev = generic_file_writev,
+ .aio_write = generic_file_aio_write,
.fsync = qnx4_sync_file,
#endif
};
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 2:36 [PATCH] add support for vectored and async I/O to all simple filesystems Christoph Hellwig
@ 2005-11-01 10:28 ` Miklos Szeredi
2005-11-01 15:27 ` Christoph Hellwig
2005-11-01 19:20 ` Jamie Lokier
1 sibling, 1 reply; 15+ messages in thread
From: Miklos Szeredi @ 2005-11-01 10:28 UTC (permalink / raw)
To: hch; +Cc: akpm, linux-fsdevel
> Every filesystem using generic_file_read/generic_file_write directly
> can easily support vectored and async (well at least the API, it's not
> async quite yet in mainline) I/O. This patch adds the proper vectors
> to all filesystems and switches them to do_sync_read/do_sync_write.
Do you really need to replace generic_file_read/write with
do_sync_read/write? That seems to only add overhead.
The rest of the patch I agree with.
Miklos
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 10:28 ` Miklos Szeredi
@ 2005-11-01 15:27 ` Christoph Hellwig
2005-11-01 17:19 ` Miklos Szeredi
0 siblings, 1 reply; 15+ messages in thread
From: Christoph Hellwig @ 2005-11-01 15:27 UTC (permalink / raw)
To: Miklos Szeredi; +Cc: hch, akpm, linux-fsdevel
On Tue, Nov 01, 2005 at 11:28:41AM +0100, Miklos Szeredi wrote:
> > Every filesystem using generic_file_read/generic_file_write directly
> > can easily support vectored and async (well at least the API, it's not
> > async quite yet in mainline) I/O. This patch adds the proper vectors
> > to all filesystems and switches them to do_sync_read/do_sync_write.
>
> Do you really need to replace generic_file_read/write with
> do_sync_read/write? That seems to only add overhead.
do_sync_read/write are doing the same thing as generic_file_read/write,
just above the method vectors. Once I have all filesystems support
proper vectored and aio methods do_sync_read/write will go away
completly and upper code will always call the complex methods (
or hopefull just one cmobined aio/vectored ops with all fancies)
directly.
>
> The rest of the patch I agree with.
>
> Miklos
---end quoted text---
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 15:27 ` Christoph Hellwig
@ 2005-11-01 17:19 ` Miklos Szeredi
2005-11-07 5:00 ` Christoph Hellwig
0 siblings, 1 reply; 15+ messages in thread
From: Miklos Szeredi @ 2005-11-01 17:19 UTC (permalink / raw)
To: hch; +Cc: akpm, linux-fsdevel
> On Tue, Nov 01, 2005 at 11:28:41AM +0100, Miklos Szeredi wrote:
> > > Every filesystem using generic_file_read/generic_file_write directly
> > > can easily support vectored and async (well at least the API, it's not
> > > async quite yet in mainline) I/O. This patch adds the proper vectors
> > > to all filesystems and switches them to do_sync_read/do_sync_write.
> >
> > Do you really need to replace generic_file_read/write with
> > do_sync_read/write? That seems to only add overhead.
>
> do_sync_read/write are doing the same thing as generic_file_read/write,
> just above the method vectors. Once I have all filesystems support
> proper vectored and aio methods do_sync_read/write will go away
> completly and upper code will always call the complex methods (
> or hopefull just one cmobined aio/vectored ops with all fancies)
> directly.
You mean f_op->read, f_op->write going away completely? That's quite
a bit of work yet.
Until then you are throwing away some performance in the hottest
read/write paths. Is it worth it? I'm not sure. Did you benchmark
the change?
Miklos
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 2:36 [PATCH] add support for vectored and async I/O to all simple filesystems Christoph Hellwig
2005-11-01 10:28 ` Miklos Szeredi
@ 2005-11-01 19:20 ` Jamie Lokier
2005-11-01 20:57 ` Benjamin LaHaise
2005-11-05 0:18 ` Christoph Hellwig
1 sibling, 2 replies; 15+ messages in thread
From: Jamie Lokier @ 2005-11-01 19:20 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: akpm, linux-fsdevel
Christoph Hellwig wrote:
> Every filesystem using generic_file_read/generic_file_write directly
> can easily support vectored and async (well at least the API, it's not
> async quite yet in mainline) I/O.
Does this change mean aio system calls will now succeed, but not
actually be asynchronous?
-- Jamie
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 19:20 ` Jamie Lokier
@ 2005-11-01 20:57 ` Benjamin LaHaise
2005-11-02 11:06 ` Jamie Lokier
2005-11-05 0:18 ` Christoph Hellwig
1 sibling, 1 reply; 15+ messages in thread
From: Benjamin LaHaise @ 2005-11-01 20:57 UTC (permalink / raw)
To: Jamie Lokier; +Cc: Christoph Hellwig, akpm, linux-fsdevel
On Tue, Nov 01, 2005 at 07:20:00PM +0000, Jamie Lokier wrote:
> Christoph Hellwig wrote:
> > Every filesystem using generic_file_read/generic_file_write directly
> > can easily support vectored and async (well at least the API, it's not
> > async quite yet in mainline) I/O.
>
> Does this change mean aio system calls will now succeed, but not
> actually be asynchronous?
Yes, there doesn't seem to be the will to merge the buffered filesystem aio
patches.
-ben
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 20:57 ` Benjamin LaHaise
@ 2005-11-02 11:06 ` Jamie Lokier
2005-11-02 16:21 ` Benjamin LaHaise
0 siblings, 1 reply; 15+ messages in thread
From: Jamie Lokier @ 2005-11-02 11:06 UTC (permalink / raw)
To: Benjamin LaHaise; +Cc: Christoph Hellwig, akpm, linux-fsdevel
Benjamin LaHaise wrote:
> On Tue, Nov 01, 2005 at 07:20:00PM +0000, Jamie Lokier wrote:
> > Christoph Hellwig wrote:
> > > Every filesystem using generic_file_read/generic_file_write directly
> > > can easily support vectored and async (well at least the API, it's not
> > > async quite yet in mainline) I/O.
> >
> > Does this change mean aio system calls will now succeed, but not
> > actually be asynchronous?
>
> Yes, there doesn't seem to be the will to merge the buffered filesystem aio
> patches.
So it means that any program that mustn't block, must now have a
stupid kernel version check to make sure it avoids even trying aio
system calls? I was under the impression that the right thing to do
so far was try them, and when EINVAL is returned, use threads instead.
-- Jamie
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-02 11:06 ` Jamie Lokier
@ 2005-11-02 16:21 ` Benjamin LaHaise
2005-11-02 16:29 ` Matthew Wilcox
0 siblings, 1 reply; 15+ messages in thread
From: Benjamin LaHaise @ 2005-11-02 16:21 UTC (permalink / raw)
To: Jamie Lokier; +Cc: Christoph Hellwig, akpm, linux-fsdevel
On Wed, Nov 02, 2005 at 11:06:30AM +0000, Jamie Lokier wrote:
> So it means that any program that mustn't block, must now have a
> stupid kernel version check to make sure it avoids even trying aio
> system calls? I was under the impression that the right thing to do
> so far was try them, and when EINVAL is returned, use threads instead.
Yes, that is correct.
-ben
--
"Time is what keeps everything from happening all at once." -- John Wheeler
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-02 16:21 ` Benjamin LaHaise
@ 2005-11-02 16:29 ` Matthew Wilcox
2005-11-02 16:45 ` Benjamin LaHaise
2005-11-02 20:31 ` Jamie Lokier
0 siblings, 2 replies; 15+ messages in thread
From: Matthew Wilcox @ 2005-11-02 16:29 UTC (permalink / raw)
To: Benjamin LaHaise; +Cc: Jamie Lokier, Christoph Hellwig, akpm, linux-fsdevel
On Wed, Nov 02, 2005 at 11:21:07AM -0500, Benjamin LaHaise wrote:
> On Wed, Nov 02, 2005 at 11:06:30AM +0000, Jamie Lokier wrote:
> > So it means that any program that mustn't block, must now have a
> > stupid kernel version check to make sure it avoids even trying aio
> > system calls? I was under the impression that the right thing to do
> > so far was try them, and when EINVAL is returned, use threads instead.
>
> Yes, that is correct.
To be fair, the aio system calls were never _guaranteed_ to not block,
were they? ISTR there were various corner cases that would still get
your task blocking while doing an aio submission.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-02 16:29 ` Matthew Wilcox
@ 2005-11-02 16:45 ` Benjamin LaHaise
2005-11-02 20:31 ` Jamie Lokier
1 sibling, 0 replies; 15+ messages in thread
From: Benjamin LaHaise @ 2005-11-02 16:45 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Jamie Lokier, Christoph Hellwig, akpm, linux-fsdevel
On Wed, Nov 02, 2005 at 09:29:04AM -0700, Matthew Wilcox wrote:
> To be fair, the aio system calls were never _guaranteed_ to not block,
> were they? ISTR there were various corner cases that would still get
> your task blocking while doing an aio submission.
They're suppose to not block except for memory allocation, that is the
definition of how the aio api is supposed to work. The patches to fix
that always seem to get met with a "woe! complexity!" response. I'm of
the opinion now that the only way to get the aio api to a usable state
of implementation is to use threads so that the impact on the rest of
the kernel isn't there for anything other than fast paths.
-ben
--
"Time is what keeps everything from happening all at once." -- John Wheeler
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-02 16:29 ` Matthew Wilcox
2005-11-02 16:45 ` Benjamin LaHaise
@ 2005-11-02 20:31 ` Jamie Lokier
2005-11-02 21:04 ` Anton Altaparmakov
1 sibling, 1 reply; 15+ messages in thread
From: Jamie Lokier @ 2005-11-02 20:31 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Benjamin LaHaise, Christoph Hellwig, akpm, linux-fsdevel
Matthew Wilcox wrote:
> On Wed, Nov 02, 2005 at 11:21:07AM -0500, Benjamin LaHaise wrote:
> > On Wed, Nov 02, 2005 at 11:06:30AM +0000, Jamie Lokier wrote:
> > > So it means that any program that mustn't block, must now have a
> > > stupid kernel version check to make sure it avoids even trying aio
> > > system calls? I was under the impression that the right thing to do
> > > so far was try them, and when EINVAL is returned, use threads instead.
> >
> > Yes, that is correct.
>
> To be fair, the aio system calls were never _guaranteed_ to not block,
> were they? ISTR there were various corner cases that would still get
> your task blocking while doing an aio submission.
Could we have some documentation of when those corner cases occur?
The main point of aio, as far as I'm aware, is to avoid the need for
threads (or reduce the number of threads) in programs using I/O that
shouldn't block, particularly when they are latency sensitive too.
If aio has a habit of blocking from time to time, then it may still be
useful, but it would be helpful to know that multiple threads are
still needed to ensure a program (e.g. such as a HTTP or SMB server)
can continue to make progress - and more helpful to know when.
One particular question is: can aio calls block for a long time due to
network delays (e.g. over NFS) and I/O delays (e.g. slow disk or CD),
or are the corner cases restricted to things like paging during memory
allocation, which is unavoidable one way or another anyway?
-- Jamie
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-02 20:31 ` Jamie Lokier
@ 2005-11-02 21:04 ` Anton Altaparmakov
2005-11-02 23:36 ` Jamie Lokier
0 siblings, 1 reply; 15+ messages in thread
From: Anton Altaparmakov @ 2005-11-02 21:04 UTC (permalink / raw)
To: Jamie Lokier
Cc: Matthew Wilcox, Benjamin LaHaise, Christoph Hellwig, akpm,
linux-fsdevel
On Wed, 2 Nov 2005, Jamie Lokier wrote:
> Matthew Wilcox wrote:
> > On Wed, Nov 02, 2005 at 11:21:07AM -0500, Benjamin LaHaise wrote:
> > > On Wed, Nov 02, 2005 at 11:06:30AM +0000, Jamie Lokier wrote:
> > > > So it means that any program that mustn't block, must now have a
> > > > stupid kernel version check to make sure it avoids even trying aio
> > > > system calls? I was under the impression that the right thing to do
> > > > so far was try them, and when EINVAL is returned, use threads instead.
> > >
> > > Yes, that is correct.
> >
> > To be fair, the aio system calls were never _guaranteed_ to not block,
> > were they? ISTR there were various corner cases that would still get
> > your task blocking while doing an aio submission.
>
> Could we have some documentation of when those corner cases occur?
>
> The main point of aio, as far as I'm aware, is to avoid the need for
> threads (or reduce the number of threads) in programs using I/O that
> shouldn't block, particularly when they are latency sensitive too.
>
> If aio has a habit of blocking from time to time, then it may still be
> useful, but it would be helpful to know that multiple threads are
> still needed to ensure a program (e.g. such as a HTTP or SMB server)
> can continue to make progress - and more helpful to know when.
>
> One particular question is: can aio calls block for a long time due to
> network delays (e.g. over NFS) and I/O delays (e.g. slow disk or CD),
> or are the corner cases restricted to things like paging during memory
> allocation, which is unavoidable one way or another anyway?
Yes, of course aio can block and in fact will block arbitrarily for
arbitrary lengths of time. At least at present the implementations of
->aio_read and ->aio_write in the file systems will block left right and
center.
For a start, i_sem is downed which can block.
Then when we get inside readpage or the relevant file write function,
buffers may be allocated for the current page which can block.
Then the filesystem needs to map the buffers if they are not mapped
already and it is possible the filesystem needs to obtain other locks
(again can block here) and even worse the filesystem may need to read data
from disk to determine where mapping information for the buffers. This
obviously is a slow and blocking operation unless your device is a ram
disk.
And in the write case the filesystem may need to allocate blocks on disk
first, which in turn will involve taking locks (and possibly blocking) in
addition to reading/writing metadata to find free blocks that can be
allocated and marking them as allocated. And that of course can involve
on-disk access and hence again blocking.
I am not sure we need documentation for all that. It is kind of obvious
once you sit and think about what a read and a write actually implies.
The only way you can _really_ have guaranteed async io is to queue the io
to a kernel thread work queue and return immediately to the caller. The
only thing you will then block on potentially is allocating memory for the
"queue entry item" and on waiting for the lock to the "queue" so it is
safe to write to it.
And if you do that, it then becomes easy to be truly non-blocking. Just
allocate with GFP_ATOMIC (and perhaps add __GFP_NORETRY and
__GFP_NORECLAIM?) and do a try lock for the queue lock. And if either of
those fails, punt the reqest and return immediately to the user with error
-EWOULDBLOCK or whatever...
You could even optimise away the queue lock by using an atomic compare and
exchange based queue addition function but that may not be worth the extra
complexity, don't know. I guess the big smp folks may see contention on
that lock... You could at least do the queues and hence their locks per
superblock or something...
Best regards,
Anton
--
Anton Altaparmakov <aia21 at cam.ac.uk> (replace at with @)
Unix Support, Computing Service, University of Cambridge, CB2 3QH, UK
Linux NTFS maintainer / IRC: #ntfs on irc.freenode.net
WWW: http://linux-ntfs.sf.net/ & http://www-stu.christs.cam.ac.uk/~aia21/
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-02 21:04 ` Anton Altaparmakov
@ 2005-11-02 23:36 ` Jamie Lokier
0 siblings, 0 replies; 15+ messages in thread
From: Jamie Lokier @ 2005-11-02 23:36 UTC (permalink / raw)
To: Anton Altaparmakov
Cc: Matthew Wilcox, Benjamin LaHaise, Christoph Hellwig, akpm,
linux-fsdevel
Anton Altaparmakov wrote:
> Yes, of course aio can block and in fact will block arbitrarily for
> arbitrary lengths of time. At least at present the implementations of
> ->aio_read and ->aio_write in the file systems will block left right and
> center.
>
> [examples...]
That's a shame. I was hoping it would offer similar properties to
non-blocking I/O on sockets: something which can indicate that the
resource is unavailable, but allows the application to continue with
other things without needing parallel threads for that.
> The only way you can _really_ have guaranteed async io is to queue the io
> to a kernel thread work queue and return immediately to the caller. The
> only thing you will then block on potentially is allocating memory for the
> "queue entry item" and on waiting for the lock to the "queue" so it is
> safe to write to it.
Since threads with well-defined blocking points can be mechanically
transformed to state machines, it is possible to guarantee async I/O
without needing extra threads. I had wondered if the Linux AIO
implementation did something like that.
However, it is too complex to convert all filesystem code from
blocking to state machines (by hand), and unnecessary. I've thought
about possible AIO implementations many times, and always return to
the idea of having AIOs be state machines which are handled
synchronously and by interrupts, up until a point where a complex
blocking point is reached in a filesystem (which shouldn't happen for
the common read/write cases, but may happen for the uncommon ones),
and then the AIOs convert to work handled by a worker thread spawned
from a pool on demand.
In other words, there are plenty of ways to guarantee properly async
I/O. I'm surprised Linux AIO isn't - that seems to defeat the whole
point of AIO.
-- Jamie
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 19:20 ` Jamie Lokier
2005-11-01 20:57 ` Benjamin LaHaise
@ 2005-11-05 0:18 ` Christoph Hellwig
1 sibling, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2005-11-05 0:18 UTC (permalink / raw)
To: Jamie Lokier; +Cc: akpm, linux-fsdevel
On Tue, Nov 01, 2005 at 07:20:00PM +0000, Jamie Lokier wrote:
> Christoph Hellwig wrote:
> > Every filesystem using generic_file_read/generic_file_write directly
> > can easily support vectored and async (well at least the API, it's not
> > async quite yet in mainline) I/O.
>
> Does this change mean aio system calls will now succeed, but not
> actually be asynchronous?
for buffered I/O: yes. meaning the behaviour on all these obscure
filesystems is the same as on all the ones everyone uses. Whether that
hehaviour is good or not is a different question and can easily
tweaked in a single file (fs/aio.c) for all filesystems now, whereas
previously we wetter utterly inconsistent.
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH] add support for vectored and async I/O to all simple filesystems
2005-11-01 17:19 ` Miklos Szeredi
@ 2005-11-07 5:00 ` Christoph Hellwig
0 siblings, 0 replies; 15+ messages in thread
From: Christoph Hellwig @ 2005-11-07 5:00 UTC (permalink / raw)
To: Miklos Szeredi; +Cc: hch, akpm, linux-fsdevel
On Tue, Nov 01, 2005 at 06:19:22PM +0100, Miklos Szeredi wrote:
> > do_sync_read/write are doing the same thing as generic_file_read/write,
> > just above the method vectors. Once I have all filesystems support
> > proper vectored and aio methods do_sync_read/write will go away
> > completly and upper code will always call the complex methods (
> > or hopefull just one cmobined aio/vectored ops with all fancies)
> > directly.
>
> You mean f_op->read, f_op->write going away completely? That's quite
> a bit of work yet.
No, they will have to stay at least until character drivers get their
own set of operation vectors. I just want to make them optional and not
need to implement them in all (or at least most) of the filesystems.
> Until then you are throwing away some performance in the hottest
> read/write paths. Is it worth it? I'm not sure. Did you benchmark
> the change?
there's not difference in dbench and tiobench runs on ppc64. the only
majuor architecture that could support is ia64 because of it's horribly
weak branch prediction for indirect calls.
^ permalink raw reply [flat|nested] 15+ messages in thread
end of thread, other threads:[~2005-11-07 5:00 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-01 2:36 [PATCH] add support for vectored and async I/O to all simple filesystems Christoph Hellwig
2005-11-01 10:28 ` Miklos Szeredi
2005-11-01 15:27 ` Christoph Hellwig
2005-11-01 17:19 ` Miklos Szeredi
2005-11-07 5:00 ` Christoph Hellwig
2005-11-01 19:20 ` Jamie Lokier
2005-11-01 20:57 ` Benjamin LaHaise
2005-11-02 11:06 ` Jamie Lokier
2005-11-02 16:21 ` Benjamin LaHaise
2005-11-02 16:29 ` Matthew Wilcox
2005-11-02 16:45 ` Benjamin LaHaise
2005-11-02 20:31 ` Jamie Lokier
2005-11-02 21:04 ` Anton Altaparmakov
2005-11-02 23:36 ` Jamie Lokier
2005-11-05 0:18 ` Christoph Hellwig
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).