All of lore.kernel.org
 help / color / mirror / Atom feed
From: Kent Overstreet <koverstreet@google.com>
To: Zach Brown <zab@redhat.com>
Cc: linux-kernel@vger.kernel.org, linux-aio@kvack.org,
	linux-fsdevel@vger.kernel.org, bcrl@kvack.org, jmoyer@redhat.com,
	axboe@kernel.dk, viro@zeniv.linux.org.uk
Subject: Re: [PATCH 14/25] aio: Make aio_read_evt() more efficient
Date: Thu, 29 Nov 2012 16:20:52 -0800	[thread overview]
Message-ID: <20121130002052.GM15094@google.com> (raw)
In-Reply-To: <20121129003816.GJ18574@lenny.home.zabbo.net>

On Wed, Nov 28, 2012 at 04:38:16PM -0800, Zach Brown wrote:
> As mentioned offlist: we don't want to be blocking under
> TASK_INTERRUPTIBLE.  Is the plan to do a non-blocking check and pop
> outside the wait loop to do a blocking copy?

Here's the latest version that I posted on irc earlier:


commit 913ff32bbd4de15a87b07a87ac196e978bc29e17
Author: Kent Overstreet <koverstreet@google.com>
Date:   Thu Nov 29 14:12:40 2012 -0800

    aio: Make aio_read_evt() more efficient
    
    Previously, aio_read_event() pulled a single completion off the
    ringbuffer at a time, locking and unlocking each time.
    
    Changed it to pull off as many events as it can at a time, and copy them
    directly to userspace.
    
    This also fixes a bug where if copying the event to userspace failed,
    we'd lose the event.
    
    Signed-off-by: Kent Overstreet <koverstreet@google.com>

diff --git a/fs/aio.c b/fs/aio.c
index 46e6d30..5eca2a4 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -63,7 +63,7 @@ struct aio_ring_info {
 	unsigned long		mmap_size;
 
 	struct page		**ring_pages;
-	spinlock_t		ring_lock;
+	struct mutex		ring_lock;
 	long			nr_pages;
 
 	unsigned		nr, tail;
@@ -341,7 +341,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 	atomic_set(&ctx->users, 2);
 	atomic_set(&ctx->dead, 0);
 	spin_lock_init(&ctx->ctx_lock);
-	spin_lock_init(&ctx->ring_info.ring_lock);
+	mutex_init(&ctx->ring_info.ring_lock);
 	init_waitqueue_head(&ctx->wait);
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
@@ -746,149 +746,138 @@ put_rq:
 }
 EXPORT_SYMBOL(aio_complete);
 
-/* aio_read_evt
- *	Pull an event off of the ioctx's event ring.  Returns the number of 
- *	events fetched (0 or 1 ;-)
- *	FIXME: make this use cmpxchg.
- *	TODO: make the ringbuffer user mmap()able (requires FIXME).
+/* aio_read_events
+ *	Pull an event off of the ioctx's event ring.  Returns the number of
+ *	events fetched
  */
-static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
+static int aio_read_events(struct kioctx *ctx, struct io_event __user *event,
+			   long nr, unsigned *head)
 {
-	struct aio_ring_info *info = &ioctx->ring_info;
+	struct aio_ring_info *info = &ctx->ring_info;
 	struct aio_ring *ring;
-	unsigned long head;
-	int ret = 0;
+	unsigned pos;
+	int ret = 0, copy_ret;
 
-	ring = kmap_atomic(info->ring_pages[0]);
-	pr_debug("h%u t%u m%u\n", ring->head, ring->tail, ring->nr);
+	pr_debug("h%u t%u m%u\n", *head, info->tail, info->nr);
 
-	if (ring->head == ring->tail)
-		goto out;
+	while (ret < nr) {
+		unsigned i = (*head < info->tail ? info->tail : info->nr) - *head;
+		struct io_event *ev;
+		struct page *page;
+
+		if (*head == info->tail)
+			break;
+
+		i = min_t(int, i, nr - ret);
+		i = min_t(int, i, AIO_EVENTS_PER_PAGE -
+			  ((*head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
+
+		pos = *head + AIO_EVENTS_OFFSET;
+		page = info->ring_pages[pos / AIO_EVENTS_PER_PAGE];
+		pos %= AIO_EVENTS_PER_PAGE;
 
-	spin_lock(&info->ring_lock);
-
-	head = ring->head % info->nr;
-	if (head != ring->tail) {
-		struct io_event *evp = aio_ring_event(info, head);
-		*ent = *evp;
-		head = (head + 1) % info->nr;
-		smp_mb(); /* finish reading the event before updatng the head */
-		ring->head = head;
-		ret = 1;
-		put_aio_ring_event(evp);
+		ev = kmap(page);
+		copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * i);
+		kunmap(page);
+
+		if (unlikely(copy_ret))
+			return -EFAULT;
+
+		ret += i;
+		*head += i;
+		*head %= info->nr;
 	}
-	spin_unlock(&info->ring_lock);
 
-out:
+	smp_mb(); /* finish reading the event before updating the head */
+
+	ring = kmap_atomic(info->ring_pages[0]);
+	ring->head = *head;
 	kunmap_atomic(ring);
-	pr_debug("%d  h%u t%u\n", ret, ring->head, ring->tail);
+
+	pr_debug("%d  h%u t%u\n", ret, *head, info->tail);
+
 	return ret;
 }
 
 static int read_events(struct kioctx *ctx,
-			long min_nr, long nr,
-			struct io_event __user *event,
-			struct timespec __user *timeout)
+		       long min_nr, long nr,
+		       struct io_event __user *event,
+		       struct timespec __user *timeout)
 {
 	DEFINE_WAIT(wait);
+	struct aio_ring_info *info = &ctx->ring_info;
+	struct aio_ring *ring;
 	struct hrtimer_sleeper t;
+	unsigned head;
 	size_t i = 0;
-	int ret;
-	struct io_event		ent;
+	int ret = 0;
 
-	/* needed to zero any padding within an entry (there shouldn't be 
-	 * any, but C is fun!
-	 */
-	memset(&ent, 0, sizeof(ent));
-	ret = 0;
-	while (likely(i < nr)) {
-		ret = aio_read_evt(ctx, &ent);
-		if (unlikely(ret <= 0))
-			break;
+	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	hrtimer_init_sleeper(&t, current);
 
-		pr_debug("%Lx %Lx %Lx %Lx\n",
-			 ent.data, ent.obj, ent.res, ent.res2);
+	mutex_lock(&info->ring_lock);
 
-		/* Could we split the check in two? */
-		ret = -EFAULT;
-		if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
-			pr_debug("lost an event due to EFAULT.\n");
+	while (i < nr) {
+		ring = kmap_atomic(info->ring_pages[0]);
+		head = ring->head;
+		kunmap_atomic(ring);
+retry:
+		ret = aio_read_events(ctx, event + i, nr - i, &head);
+		if (ret < 0)
 			break;
-		}
-		ret = 0;
 
-		/* Good, event copied to userland, update counts. */
-		event ++;
-		i ++;
-	}
-
-	if (min_nr <= i)
-		return i;
-	if (ret)
-		return ret;
-
-	/* End fast path */
+		i += ret;
+		if (i >= min_nr)
+			break;
+		if (unlikely(atomic_read(&ctx->dead))) {
+			ret = -EINVAL;
+			break;
+		}
+		if (!t.task)	/* Only check after read evt */
+			break;
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	hrtimer_init_sleeper(&t, current);
+		if (timeout) {
+			struct timespec	ts;
 
-	if (timeout) {
-		struct timespec	ts;
+			if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) {
+				ret = -EFAULT;
+				break;
+			}
 
-		if (unlikely(copy_from_user(&ts, timeout, sizeof(ts)))) {
-			ret = -EFAULT;
-			goto out;
+			timeout = NULL;
+			hrtimer_start_range_ns(&t.timer, timespec_to_ktime(ts),
+					       current->timer_slack_ns,
+					       HRTIMER_MODE_REL);
 		}
 
-		hrtimer_start_range_ns(&t.timer, timespec_to_ktime(ts),
-				       current->timer_slack_ns, HRTIMER_MODE_REL);
-	}
-
-	while (likely(i < nr)) {
 		prepare_to_wait_exclusive(&ctx->wait, &wait,
 					  TASK_INTERRUPTIBLE);
 
-		do {
-			ret = aio_read_evt(ctx, &ent);
-			if (ret)
-				break;
-			if (min_nr <= i)
-				break;
-			if (unlikely(atomic_read(&ctx->dead))) {
-				ret = -EINVAL;
-				break;
-			}
-			if (!t.task)	/* Only check after read evt */
-				break;
-			/* Try to only show up in io wait if there are ops
-			 *  in flight */
-			if (atomic_read(&ctx->reqs_active))
-				io_schedule();
-			else
-				schedule();
-			if (signal_pending(current)) {
-				ret = -EINTR;
-				break;
-			}
-			/*ret = aio_read_evt(ctx, &ent);*/
-		} while (1) ;
+		if (head != info->tail) {
+			__set_current_state(TASK_RUNNING);
+			goto retry;
+		}
 
-		finish_wait(&ctx->wait, &wait);
+		mutex_unlock(&info->ring_lock);
 
-		if (unlikely(ret <= 0))
-			break;
+		/* Try to only show up in io wait if there are ops in flight */
+		if (atomic_read(&ctx->reqs_active))
+			io_schedule();
+		else
+			schedule();
 
-		ret = -EFAULT;
-		if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
-			pr_debug("lost an event due to EFAULT.\n");
-			break;
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			goto out;
 		}
 
-		/* Good, event copied to userland, update counts. */
-		event ++;
-		i ++;
+		__set_current_state(TASK_RUNNING);
+		mutex_lock(&info->ring_lock);
 	}
+
+	mutex_unlock(&info->ring_lock);
 out:
+	finish_wait(&ctx->wait, &wait);
 	hrtimer_cancel(&t.timer);
 	destroy_hrtimer_on_stack(&t.timer);
 	return i ? i : ret;

  parent reply	other threads:[~2012-11-30  0:20 UTC|newest]

Thread overview: 95+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-28 16:43 [PATCH 00/25] AIO performance improvements/cleanups Kent Overstreet
2012-11-28 16:43 ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 01/25] mm: remove old aio use_mm() comment Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 02/25] aio: remove dead code from aio.h Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 03/25] gadget: remove only user of aio retry Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 04/25] aio: remove retry-based AIO Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 05/25] char: add aio_{read,write} to /dev/{null,zero} Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 06/25] aio: Kill return value of aio_complete() Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 07/25] aio: kiocb_cancel() Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-29  0:07   ` Zach Brown
2012-11-29  0:58     ` Kent Overstreet
2012-11-29  0:58       ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 08/25] aio: Move private stuff out of aio.h Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 09/25] aio: dprintk() -> pr_debug() Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 10/25] aio: do fget() after aio_get_req() Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 11/25] aio: Make aio_put_req() lockless Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 12/25] aio: Refcounting cleanup Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-29  0:17   ` Zach Brown
2012-11-29  0:17     ` Zach Brown
2012-11-29  1:12     ` Kent Overstreet
2012-11-29  1:12       ` Kent Overstreet
2012-11-29  0:46   ` Benjamin LaHaise
2012-11-29  0:46     ` Benjamin LaHaise
2012-11-29  1:38     ` Kent Overstreet
2012-11-29  1:38       ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 13/25] aio: Convert read_events() to hrtimers Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-29  0:24   ` Zach Brown
2012-11-29  0:24     ` Zach Brown
2012-11-29  1:05     ` Kent Overstreet
2012-11-29  1:05       ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 14/25] aio: Make aio_read_evt() more efficient Kent Overstreet
2012-11-29  0:38   ` Zach Brown
2012-11-29  0:38     ` Zach Brown
2012-11-29 19:31     ` Kent Overstreet
2012-11-29 19:31       ` Kent Overstreet
2012-11-30  0:20     ` Kent Overstreet [this message]
2012-11-28 16:43 ` [PATCH 15/25] aio: Use cancellation list lazily Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 16/25] aio: Change reqs_active to include unreaped completions Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 17/25] aio: Kill batch allocation Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 18/25] aio: Kill struct aio_ring_info Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 19/25] aio: Give shared kioctx fields their own cachelines Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 20/25] aio: reqs_active -> reqs_available Kent Overstreet
2012-11-28 16:43 ` [PATCH 21/25] aio: percpu reqs_available Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-28 16:43 ` [PATCH 22/25] Generic dynamic per cpu refcounting Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-29 18:45   ` Andi Kleen
2012-11-29 18:45     ` Andi Kleen
2012-11-29 18:57     ` Kent Overstreet
2012-11-29 18:57       ` Kent Overstreet
2012-11-29 18:59       ` Andi Kleen
2012-11-29 19:12         ` Kent Overstreet
2012-11-29 19:12           ` Kent Overstreet
2012-11-29 19:20           ` Andi Kleen
2012-11-29 19:20             ` Andi Kleen
2012-11-29 19:29             ` Kent Overstreet
2012-11-29 19:29               ` Kent Overstreet
2012-11-29 19:34               ` Benjamin LaHaise
2012-11-29 19:34                 ` Benjamin LaHaise
2012-11-29 20:22                 ` Kent Overstreet
2012-11-29 20:42                   ` Andi Kleen
2012-11-29 20:45                     ` Kent Overstreet
2012-11-29 20:45                       ` Kent Overstreet
2012-11-29 20:54                       ` Andi Kleen
2012-11-29 20:54                         ` Andi Kleen
2012-11-29 20:59                         ` Kent Overstreet
2012-11-29 21:57                           ` Jamie Lokier
2012-11-29 21:57                             ` Jamie Lokier
2012-11-28 16:43 ` [PATCH 23/25] aio: Percpu ioctx refcount Kent Overstreet
2012-11-28 16:43 ` [PATCH 24/25] aio: use xchg() instead of completion_lock Kent Overstreet
2012-11-28 16:43 ` [PATCH 25/25] aio: Don't include aio.h in sched.h Kent Overstreet
2012-11-28 16:43   ` Kent Overstreet
2012-11-29  0:03 ` [PATCH 00/25] AIO performance improvements/cleanups Zach Brown
2012-11-29  0:03   ` Zach Brown
2012-11-29 19:01   ` Kent Overstreet
2012-11-29 19:01     ` Kent Overstreet
  -- strict thread matches above, loose matches on Subject: below --
2012-11-28  3:19 Kent Overstreet
2012-11-28  3:19 ` [PATCH 14/25] aio: Make aio_read_evt() more efficient Kent Overstreet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20121130002052.GM15094@google.com \
    --to=koverstreet@google.com \
    --cc=axboe@kernel.dk \
    --cc=bcrl@kvack.org \
    --cc=jmoyer@redhat.com \
    --cc=linux-aio@kvack.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=zab@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.