From: Suparna Bhattacharya <suparna@in.ibm.com>
To: linux-aio@kvack.org, akpm@osdl.org, drepper@redhat.com
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
jakub@redhat.com, mingo@elte.hu
Subject: [PATCHSET 4][PATCH 1/1] AIO fallback for pipes, sockets and pollable fds
Date: Fri, 5 Jan 2007 11:02:22 +0530 [thread overview]
Message-ID: <20070105053222.GA12568@in.ibm.com> (raw)
In-Reply-To: <20061227153855.GA25898@in.ibm.com>
As glibc POSIX AIO switches over completely to using native AIO it needs
basic AIO support for various file types - including sockets, pipes etc.
Since userland will no longer be simulating asynchronous behaviour
with threads, it expects the underlying implementation to be asynchronous.
Which is still an issue with native linux AIO.
One (not so appealing) alternative that has been considered in the past is
a fallback path that spawns a kernel thread per AIO request. This in some
sense amounts to pushing the problem down from user to kernel space.
Fortunately we can do better. We can effectively simulate AIO in kernel
using async poll and O_NONBLOCK for all pollable fds, i.e. sockets, pipes
etc.
With this scheme in place, all that needs to be done to add AIO support
for any pollable file type is to make sure that the corresponding
f_op->aio_read/aio_write implements O_NONBLOCK behaviour if called in
aio context, i.e. with an async kiocb. The high level common AIO code
takes care of the rest, by enabling retries for completing the rest of
the IO to be initiated directly via poll wait notifications.
This fallback option should be good enough to get us to working POSIX AIO,
now that filesystem AIO already takes care of ISREG files which do not
support O_NONBLOCK. I have tested this with modified pipetest runs, also
using sockets instead of pipes.
---
linux-2.6.20-rc1-root/fs/aio.c | 54 +++++++++++++++++++++++++++++++++++++
linux-2.6.20-rc1-root/fs/pipe.c | 17 +++++++----
linux-2.6.20-rc1-root/net/socket.c | 6 ++--
3 files changed, 69 insertions(+), 8 deletions(-)
diff -puN fs/aio.c~aio-fallback-nonblock fs/aio.c
--- linux-2.6.20-rc1/fs/aio.c~aio-fallback-nonblock 2007-01-03 19:16:36.000000000 +0530
+++ linux-2.6.20-rc1-root/fs/aio.c 2007-01-05 10:29:52.000000000 +0530
@@ -30,6 +30,7 @@
#include <linux/highmem.h>
#include <linux/workqueue.h>
#include <linux/security.h>
+#include <linux/poll.h>
#include <linux/eventpoll.h>
#include <asm/kmap_types.h>
@@ -1315,6 +1316,42 @@ static void aio_advance_iovec(struct kio
BUG_ON(ret > 0 && iocb->ki_left == 0);
}
+/* Wrapper structure used by poll queuing */
+struct aio_pqueue {
+ poll_table pt;
+ struct kiocb *iocb;
+};
+
+static int aio_cancel_wait(struct kiocb *iocb, struct io_event *event)
+{
+ wait_queue_head_t *wq = (struct wait_queue_head_t *)iocb->private;
+ if (wq)
+ wake_up(wq);
+ event->res = iocb->ki_nbytes - iocb->ki_left;
+ event->res2 = 0;
+ /* drop the cancel reference */
+ aio_put_req(iocb);
+ return 0;
+}
+
+/* Sets things up for a readiness event to trigger the iocb's retry */
+static void aio_poll_table_queue_proc(struct file *file,
+ wait_queue_head_t *whead, poll_table *pt)
+{
+ struct kiocb *iocb = container_of(pt, struct aio_pqueue, pt)->iocb;
+
+ if (unlikely(iocb->private && iocb->ki_dtor)) {
+ /* FIXME: We really shouldn't have to do this */
+ /* the siocb allocation in socket.c is unused AFAIK */
+ iocb->ki_dtor(iocb);
+ iocb->ki_dtor = NULL;
+ }
+
+ iocb->private = whead;
+ iocb->ki_cancel = aio_cancel_wait;
+ prepare_to_wait(whead, &iocb->ki_wait.wait, 0);
+}
+
static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
{
struct file *file = iocb->ki_filp;
@@ -1334,6 +1371,7 @@ static ssize_t aio_rw_vect_retry(struct
opcode = IOCB_CMD_PWRITEV;
}
+ready:
do {
ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
iocb->ki_nr_segs - iocb->ki_cur_seg,
@@ -1352,6 +1390,22 @@ static ssize_t aio_rw_vect_retry(struct
if ((ret == 0) || (iocb->ki_left == 0))
ret = iocb->ki_nbytes - iocb->ki_left;
+ if (ret == -EAGAIN && file->f_op->poll) {
+ /* This means fop->aio_read implements O_NONBLOCK behaviour */
+ /* Let us try to simulate aio retries using ->poll */
+ struct aio_pqueue pollq = {.iocb = iocb};
+ int events = (opcode == IOCB_CMD_PWRITEV) ?
+ POLLOUT | POLLERR | POLLHUP :
+ POLLIN | POLLERR | POLLHUP;
+
+ init_poll_funcptr(&pollq.pt, aio_poll_table_queue_proc);
+ ret = file->f_op->poll(file, &pollq.pt);
+ if (ret >= 0) {
+ if (ret & events)
+ goto ready;
+ ret = -EIOCBRETRY;
+ }
+ }
return ret;
}
diff -puN net/socket.c~aio-fallback-nonblock net/socket.c
--- linux-2.6.20-rc1/net/socket.c~aio-fallback-nonblock 2007-01-03 19:16:36.000000000 +0530
+++ linux-2.6.20-rc1-root/net/socket.c 2007-01-03 19:16:36.000000000 +0530
@@ -701,7 +701,8 @@ static ssize_t do_sock_read(struct msghd
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+ msg->msg_flags = ((file->f_flags & O_NONBLOCK) || !is_sync_kiocb(iocb))
+ ? MSG_DONTWAIT : 0;
return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
}
@@ -741,7 +742,8 @@ static ssize_t do_sock_write(struct msgh
msg->msg_controllen = 0;
msg->msg_iov = (struct iovec *)iov;
msg->msg_iovlen = nr_segs;
- msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
+ msg->msg_flags = ((file->f_flags & O_NONBLOCK) || !is_sync_kiocb(iocb))
+ ? MSG_DONTWAIT : 0;
if (sock->type == SOCK_SEQPACKET)
msg->msg_flags |= MSG_EOR;
diff -puN fs/pipe.c~aio-fallback-nonblock fs/pipe.c
--- linux-2.6.20-rc1/fs/pipe.c~aio-fallback-nonblock 2007-01-03 19:16:36.000000000 +0530
+++ linux-2.6.20-rc1-root/fs/pipe.c 2007-01-03 19:16:36.000000000 +0530
@@ -226,14 +226,16 @@ pipe_read(struct kiocb *iocb, const stru
struct pipe_inode_info *pipe;
int do_wakeup;
ssize_t ret;
- struct iovec *iov = (struct iovec *)_iov;
+ struct iovec iov_array[nr_segs];
+ struct iovec *iov = iov_array;
size_t total_len;
- total_len = iov_length(iov, nr_segs);
+ total_len = iov_length(_iov, nr_segs);
/* Null read succeeds. */
if (unlikely(total_len == 0))
return 0;
+ memcpy(iov, _iov, nr_segs * sizeof(struct iovec));
do_wakeup = 0;
ret = 0;
mutex_lock(&inode->i_mutex);
@@ -302,7 +304,8 @@ redo:
*/
if (ret)
break;
- if (filp->f_flags & O_NONBLOCK) {
+ if (filp->f_flags & O_NONBLOCK ||
+ !is_sync_kiocb(iocb)) {
ret = -EAGAIN;
break;
}
@@ -339,15 +342,17 @@ pipe_write(struct kiocb *iocb, const str
struct pipe_inode_info *pipe;
ssize_t ret;
int do_wakeup;
- struct iovec *iov = (struct iovec *)_iov;
+ struct iovec iov_array[nr_segs];
+ struct iovec *iov = iov_array;
size_t total_len;
ssize_t chars;
- total_len = iov_length(iov, nr_segs);
+ total_len = iov_length(_iov, nr_segs);
/* Null write succeeds. */
if (unlikely(total_len == 0))
return 0;
+ memcpy(iov, _iov, nr_segs * sizeof(struct iovec));
do_wakeup = 0;
ret = 0;
mutex_lock(&inode->i_mutex);
@@ -473,7 +478,7 @@ redo2:
}
if (bufs < PIPE_BUFFERS)
continue;
- if (filp->f_flags & O_NONBLOCK) {
+ if (filp->f_flags & O_NONBLOCK || !is_sync_kiocb(iocb)) {
if (!ret)
ret = -EAGAIN;
break;
_
--
Suparna Bhattacharya (suparna@in.ibm.com)
Linux Technology Center
IBM Software Lab, India
--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org. For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
prev parent reply other threads:[~2007-01-05 5:32 UTC|newest]
Thread overview: 60+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-12-27 15:38 [RFC] Heads up on a series of AIO patchsets Suparna Bhattacharya
2006-12-27 16:25 ` Christoph Hellwig
2006-12-27 16:55 ` Ingo Molnar
2006-12-27 17:18 ` Ingo Molnar
2006-12-28 11:41 ` Evgeniy Polyakov
2007-01-02 21:38 ` Dan Williams
2007-01-03 13:35 ` Evgeniy Polyakov
2006-12-28 8:23 ` [PATCHSET 1][PATCH 0/6] Filesystem AIO read/write Suparna Bhattacharya
2006-12-28 8:34 ` [FSAIO][PATCH 1/6] Add a wait queue parameter to the wait_bit action routine Suparna Bhattacharya
2006-12-28 8:46 ` Suparna Bhattacharya
2006-12-28 8:36 ` [FSAIO][PATCH 2/8] Rename __lock_page to lock_page_slow Suparna Bhattacharya
2006-12-28 8:39 ` [FSAIO][PATCH 3/8] Routines to initialize and test a wait bit key Suparna Bhattacharya
2006-12-28 22:42 ` Andrew Morton
2006-12-28 8:39 ` [FSAIO][PATCH 4/8] Add a default io wait bit field in task struct Suparna Bhattacharya
2006-12-28 8:40 ` [FSAIO][PATCH 5/8] Enable wait bit based filtered wakeups to work for AIO Suparna Bhattacharya
2006-12-28 8:41 ` [FSAIO][PATCH 6/8] Enable asynchronous wait page and lock page Suparna Bhattacharya
2006-12-28 11:55 ` Christoph Hellwig
2006-12-28 14:47 ` Suparna Bhattacharya
2007-01-02 14:26 ` Christoph Hellwig
2007-01-04 6:50 ` Nick Piggin
2006-12-28 8:42 ` [FSAIO][PATCH 7/8] Filesystem AIO read Suparna Bhattacharya
2006-12-28 11:57 ` Christoph Hellwig
2006-12-28 14:15 ` Christoph Hellwig
2006-12-28 15:18 ` Suparna Bhattacharya
2007-01-02 14:29 ` Christoph Hellwig
2006-12-28 16:22 ` Jan Engelhardt
2006-12-28 16:56 ` Randy Dunlap
2006-12-28 8:44 ` [FSAIO][PATCH 8/8] AIO O_SYNC filesystem write Suparna Bhattacharya
2006-12-28 9:52 ` [PATCHSET 1][PATCH 0/6] Filesystem AIO read/write Ingo Molnar
2006-12-28 22:53 ` Andrew Morton
2007-01-03 22:15 ` Andrew Morton
2007-01-04 4:56 ` Suparna Bhattacharya
2007-01-04 5:51 ` Nick Piggin
2007-01-04 6:26 ` Suparna Bhattacharya
2007-01-04 6:50 ` Nick Piggin
2007-01-04 11:24 ` Suparna Bhattacharya
2007-01-05 4:56 ` Nick Piggin
2007-01-04 17:02 ` Andrew Morton
2007-01-04 17:49 ` Jens Axboe
2007-01-05 6:28 ` Suparna Bhattacharya
2007-01-05 7:02 ` Jens Axboe
2007-01-05 8:08 ` Suparna Bhattacharya
2007-01-05 8:32 ` Jens Axboe
2007-01-10 5:44 ` Suparna Bhattacharya
2007-01-11 1:08 ` Andrew Morton
2007-01-11 3:13 ` Suparna Bhattacharya
2007-01-11 4:52 ` Andrew Morton
2007-01-02 23:56 ` [RFC] Heads up on a series of AIO patchsets Zach Brown
[not found] ` <6f703f960701021640y444bc537w549fd6d74f3e9529@mail.gmail.com>
[not found] ` <A85B8249-FC4E-4612-8B28-02BC680DC812@oracle.com>
2007-01-03 1:18 ` Kent Overstreet
2007-01-04 20:33 ` Pavel Machek
2007-01-03 5:03 ` Suparna Bhattacharya
2007-01-05 0:36 ` Zach Brown
2007-01-03 7:23 ` [PATCHSET 2][PATCH 1/1] Combining epoll and disk file AIO Suparna Bhattacharya
2007-01-04 9:27 ` [PATCHSET 3][PATCH 0/5][AIO] - AIO completion signal notification v4 Bharata B Rao
2007-01-04 9:30 ` [PATCHSET 3][PATCH 1/5][AIO] - Rework compat_sys_io_submit Bharata B Rao
2007-01-04 9:32 ` [PATCHSET 3][PATCH 2/5][AIO] - fix aio.h includes Bharata B Rao
2007-01-04 9:34 ` [PATCHSET 3][PATCH 3/5][AIO] - Make good_sigevent non-static Bharata B Rao
2007-01-04 9:38 ` [PATCHSET 3][PATCH 4/5][AIO] - AIO completion signal notification Bharata B Rao
2007-01-04 9:40 ` [PATCHSET 3][PATCH 5/5][AIO] - Add listio support Bharata B Rao
2007-01-05 5:32 ` Suparna Bhattacharya [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070105053222.GA12568@in.ibm.com \
--to=suparna@in.ibm.com \
--cc=akpm@osdl.org \
--cc=drepper@redhat.com \
--cc=jakub@redhat.com \
--cc=linux-aio@kvack.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).