From: Kent Overstreet <koverstreet@google.com>
To: Zach Brown <zab@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-aio@kvack.org,
linux-fsdevel@vger.kernel.org, bcrl@kvack.org, jmoyer@redhat.com,
axboe@kernel.dk, viro@zeniv.linux.org.uk
Subject: Re: [PATCH 14/25] aio: Make aio_read_evt() more efficient
Date: Thu, 29 Nov 2012 16:20:52 -0800 [thread overview]
Message-ID: <20121130002052.GM15094@google.com> (raw)
In-Reply-To: <20121129003816.GJ18574@lenny.home.zabbo.net>
On Wed, Nov 28, 2012 at 04:38:16PM -0800, Zach Brown wrote:
> As mentioned offlist: we don't want to be blocking under
> TASK_INTERRUPTIBLE. Is the plan to do a non-blocking check and pop
> outside the wait loop to do a blocking copy?
Here's the latest version that I posted on irc earlier:
commit 913ff32bbd4de15a87b07a87ac196e978bc29e17
Author: Kent Overstreet <koverstreet@google.com>
Date: Thu Nov 29 14:12:40 2012 -0800
aio: Make aio_read_evt() more efficient
Previously, aio_read_event() pulled a single completion off the
ringbuffer at a time, locking and unlocking each time.
Changed it to pull off as many events as it can at a time, and copy them
directly to userspace.
This also fixes a bug where if copying the event to userspace failed,
we'd lose the event.
Signed-off-by: Kent Overstreet <koverstreet@google.com>
diff --git a/fs/aio.c b/fs/aio.c
index 46e6d30..5eca2a4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -63,7 +63,7 @@ struct aio_ring_info {
unsigned long mmap_size;
struct page **ring_pages;
- spinlock_t ring_lock;
+ struct mutex ring_lock;
long nr_pages;
unsigned nr, tail;
@@ -341,7 +341,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
atomic_set(&ctx->users, 2);
atomic_set(&ctx->dead, 0);
spin_lock_init(&ctx->ctx_lock);
- spin_lock_init(&ctx->ring_info.ring_lock);
+ mutex_init(&ctx->ring_info.ring_lock);
init_waitqueue_head(&ctx->wait);
INIT_LIST_HEAD(&ctx->active_reqs);
@@ -746,149 +746,138 @@ put_rq:
}
EXPORT_SYMBOL(aio_complete);
-/* aio_read_evt
- * Pull an event off of the ioctx's event ring. Returns the number of
- * events fetched (0 or 1 ;-)
- * FIXME: make this use cmpxchg.
- * TODO: make the ringbuffer user mmap()able (requires FIXME).
+/* aio_read_events
+ * Pull an event off of the ioctx's event ring. Returns the number of
+ * events fetched
*/
-static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
+static int aio_read_events(struct kioctx *ctx, struct io_event __user *event,
+ long nr, unsigned *head)
{
- struct aio_ring_info *info = &ioctx->ring_info;
+ struct aio_ring_info *info = &ctx->ring_info;
struct aio_ring *ring;
- unsigned long head;
- int ret = 0;
+ unsigned pos;
+ int ret = 0, copy_ret;
- ring = kmap_atomic(info->ring_pages[0]);
- pr_debug("h%u t%u m%u\n", ring->head, ring->tail, ring->nr);
+ pr_debug("h%u t%u m%u\n", *head, info->tail, info->nr);
- if (ring->head == ring->tail)
- goto out;
+ while (ret < nr) {
+ unsigned i = (*head < info->tail ? info->tail : info->nr) - *head;
+ struct io_event *ev;
+ struct page *page;
+
+ if (*head == info->tail)
+ break;
+
+ i = min_t(int, i, nr - ret);
+ i = min_t(int, i, AIO_EVENTS_PER_PAGE -
+ ((*head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
+
+ pos = *head + AIO_EVENTS_OFFSET;
+ page = info->ring_pages[pos / AIO_EVENTS_PER_PAGE];
+ pos %= AIO_EVENTS_PER_PAGE;
- spin_lock(&info->ring_lock);
-
- head = ring->head % info->nr;
- if (head != ring->tail) {
- struct io_event *evp = aio_ring_event(info, head);
- *ent = *evp;
- head = (head + 1) % info->nr;
- smp_mb(); /* finish reading the event before updatng the head */
- ring->head = head;
- ret = 1;
- put_aio_ring_event(evp);
+ ev = kmap(page);
+ copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * i);
+ kunmap(page);
+
+ if (unlikely(copy_ret))
+ return -EFAULT;
+
+ ret += i;
+ *head += i;
+ *head %= info->nr;
}
- spin_unlock(&info->ring_lock);
-out:
+ smp_mb(); /* finish reading the event before updating the head */
+
+ ring = kmap_atomic(info->ring_pages[0]);
+ ring->head = *head;
kunmap_atomic(ring);
- pr_debug("%d h%u t%u\n", ret, ring->head, ring->tail);
+
+ pr_debug("%d h%u t%u\n", ret, *head, info->tail);
+
return ret;
}
static int read_events(struct kioctx *ctx,
- long min_nr, long nr,
- struct io_event __user *event,
- struct timespec __user *timeout)
+ long min_nr, long nr,
+ struct io_event __user *event,
+ struct timespec __user *timeout)
{
DEFINE_WAIT(wait);
+ struct aio_ring_info *info = &ctx->ring_info;
+ struct aio_ring *ring;
struct hrtimer_sleeper t;
+ unsigned head;
size_t i = 0;
- int ret;
- struct io_event ent;
+ int ret = 0;
- /* needed to zero any padding within an entry (there shouldn't be
- * any, but C is fun!
- */
- memset(&ent, 0, sizeof(ent));
- ret = 0;
- while (likely(i < nr)) {
- ret = aio_read_evt(ctx, &ent);
- if (unlikely(ret <= 0))
- break;
+ hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init_sleeper(&t, current);
- pr_debug("%Lx %Lx %Lx %Lx\n",
- ent.data, ent.obj, ent.res, ent.res2);
+ mutex_lock(&info->ring_lock);
- /* Could we split the check in two? */
- ret = -EFAULT;
- if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
- pr_debug("lost an event due to EFAULT.\n");
+ while (i < nr) {
+ ring = kmap_atomic(info->ring_pages[0]);
+ head = ring->head;
+ kunmap_atomic(ring);
+retry:
+ ret = aio_read_events(ctx, event + i, nr - i, &head);
+ if (ret < 0)
break;
- }
- ret = 0;
- /* Good, event copied to userland, update counts. */
- event ++;
- i ++;
- }
-
- if (min_nr <= i)
- return i;
- if (ret)
- return ret;
-
- /* End fast path */
+ i += ret;
+ if (i >= min_nr)
+ break;
+ if (unlikely(atomic_read(&ctx->dead))) {
+ ret = -EINVAL;
+ break;
+ }
+ if (!t.task) /* Only check after read evt */
+ break;
- hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- hrtimer_init_sleeper(&t, current);
+ if (timeout) {
+ struct timespec ts;
- if (timeout) {
- struct timespec ts;
+ if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) {
+ ret = -EFAULT;
+ break;
+ }
- if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) {
- ret = -EFAULT;
- goto out;
+ timeout = NULL;
+ hrtimer_start_range_ns(&t.timer, timespec_to_ktime(ts),
+ current->timer_slack_ns,
+ HRTIMER_MODE_REL);
}
- hrtimer_start_range_ns(&t.timer, timespec_to_ktime(ts),
- current->timer_slack_ns, HRTIMER_MODE_REL);
- }
-
- while (likely(i < nr)) {
prepare_to_wait_exclusive(&ctx->wait, &wait,
TASK_INTERRUPTIBLE);
- do {
- ret = aio_read_evt(ctx, &ent);
- if (ret)
- break;
- if (min_nr <= i)
- break;
- if (unlikely(atomic_read(&ctx->dead))) {
- ret = -EINVAL;
- break;
- }
- if (!t.task) /* Only check after read evt */
- break;
- /* Try to only show up in io wait if there are ops
- * in flight */
- if (atomic_read(&ctx->reqs_active))
- io_schedule();
- else
- schedule();
- if (signal_pending(current)) {
- ret = -EINTR;
- break;
- }
- /*ret = aio_read_evt(ctx, &ent);*/
- } while (1) ;
+ if (head != info->tail) {
+ __set_current_state(TASK_RUNNING);
+ goto retry;
+ }
- finish_wait(&ctx->wait, &wait);
+ mutex_unlock(&info->ring_lock);
- if (unlikely(ret <= 0))
- break;
+ /* Try to only show up in io wait if there are ops in flight */
+ if (atomic_read(&ctx->reqs_active))
+ io_schedule();
+ else
+ schedule();
- ret = -EFAULT;
- if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
- pr_debug("lost an event due to EFAULT.\n");
- break;
+ if (signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
}
- /* Good, event copied to userland, update counts. */
- event ++;
- i ++;
+ __set_current_state(TASK_RUNNING);
+ mutex_lock(&info->ring_lock);
}
+
+ mutex_unlock(&info->ring_lock);
out:
+ finish_wait(&ctx->wait, &wait);
hrtimer_cancel(&t.timer);
destroy_hrtimer_on_stack(&t.timer);
return i ? i : ret;
next prev parent reply other threads:[~2012-11-30 0:20 UTC|newest]
Thread overview: 95+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-11-28 16:43 [PATCH 00/25] AIO performance improvements/cleanups Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 01/25] mm: remove old aio use_mm() comment Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 02/25] aio: remove dead code from aio.h Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 03/25] gadget: remove only user of aio retry Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 04/25] aio: remove retry-based AIO Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 05/25] char: add aio_{read,write} to /dev/{null,zero} Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 06/25] aio: Kill return value of aio_complete() Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 07/25] aio: kiocb_cancel() Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-29 0:07 ` Zach Brown
2012-11-29 0:58 ` Kent Overstreet
2012-11-29 0:58 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 08/25] aio: Move private stuff out of aio.h Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 09/25] aio: dprintk() -> pr_debug() Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 10/25] aio: do fget() after aio_get_req() Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 11/25] aio: Make aio_put_req() lockless Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 12/25] aio: Refcounting cleanup Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-29 0:17 ` Zach Brown
2012-11-29 0:17 ` Zach Brown
2012-11-29 1:12 ` Kent Overstreet
2012-11-29 1:12 ` Kent Overstreet
2012-11-29 0:46 ` Benjamin LaHaise
2012-11-29 0:46 ` Benjamin LaHaise
2012-11-29 1:38 ` Kent Overstreet
2012-11-29 1:38 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 13/25] aio: Convert read_events() to hrtimers Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-29 0:24 ` Zach Brown
2012-11-29 0:24 ` Zach Brown
2012-11-29 1:05 ` Kent Overstreet
2012-11-29 1:05 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 14/25] aio: Make aio_read_evt() more efficient Kent Overstreet
2012-11-29 0:38 ` Zach Brown
2012-11-29 0:38 ` Zach Brown
2012-11-29 19:31 ` Kent Overstreet
2012-11-29 19:31 ` Kent Overstreet
2012-11-30 0:20 ` Kent Overstreet [this message]
2012-11-28 16:43 ` [PATCH 15/25] aio: Use cancellation list lazily Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 16/25] aio: Change reqs_active to include unreaped completions Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 17/25] aio: Kill batch allocation Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 18/25] aio: Kill struct aio_ring_info Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 19/25] aio: Give shared kioctx fields their own cachelines Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 20/25] aio: reqs_active -> reqs_available Kent Overstreet
2012-11-28 16:43 ` [PATCH 21/25] aio: percpu reqs_available Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 22/25] Generic dynamic per cpu refcounting Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-29 18:45 ` Andi Kleen
2012-11-29 18:45 ` Andi Kleen
2012-11-29 18:57 ` Kent Overstreet
2012-11-29 18:57 ` Kent Overstreet
2012-11-29 18:59 ` Andi Kleen
2012-11-29 19:12 ` Kent Overstreet
2012-11-29 19:12 ` Kent Overstreet
2012-11-29 19:20 ` Andi Kleen
2012-11-29 19:20 ` Andi Kleen
2012-11-29 19:29 ` Kent Overstreet
2012-11-29 19:29 ` Kent Overstreet
2012-11-29 19:34 ` Benjamin LaHaise
2012-11-29 19:34 ` Benjamin LaHaise
2012-11-29 20:22 ` Kent Overstreet
2012-11-29 20:42 ` Andi Kleen
2012-11-29 20:45 ` Kent Overstreet
2012-11-29 20:45 ` Kent Overstreet
2012-11-29 20:54 ` Andi Kleen
2012-11-29 20:54 ` Andi Kleen
2012-11-29 20:59 ` Kent Overstreet
2012-11-29 21:57 ` Jamie Lokier
2012-11-29 21:57 ` Jamie Lokier
2012-11-28 16:43 ` [PATCH 23/25] aio: Percpu ioctx refcount Kent Overstreet
2012-11-28 16:43 ` [PATCH 24/25] aio: use xchg() instead of completion_lock Kent Overstreet
2012-11-28 16:43 ` [PATCH 25/25] aio: Don't include aio.h in sched.h Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-29 0:03 ` [PATCH 00/25] AIO performance improvements/cleanups Zach Brown
2012-11-29 0:03 ` Zach Brown
2012-11-29 19:01 ` Kent Overstreet
2012-11-29 19:01 ` Kent Overstreet
-- strict thread matches above, loose matches on Subject: below --
2012-11-28 3:19 Kent Overstreet
2012-11-28 3:19 ` [PATCH 14/25] aio: Make aio_read_evt() more efficient Kent Overstreet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20121130002052.GM15094@google.com \
--to=koverstreet@google.com \
--cc=axboe@kernel.dk \
--cc=bcrl@kvack.org \
--cc=jmoyer@redhat.com \
--cc=linux-aio@kvack.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=viro@zeniv.linux.org.uk \
--cc=zab@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.