public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: kenchen@google.com (Ken Chen)
To: akpm@linux-foundation.org, linux-aio@kvack.org,
	linux-kernel@vger.kernel.org
Subject: [patch] convert aio event reap to use atomic-op instead of spin_lock
Date: Tue, 10 Apr 2007 16:53:53 -0700 (PDT)	[thread overview]
Message-ID: <20070410235353.325A7346F64@localhost> (raw)

Resurrect an old patch that uses atomic operation to update ring buffer
index on AIO event queue.  This work allows futher application/libaio
optimization to run fast path io_getevents in user space.

I've also added one more change on top of old implementation that rounds
ring buffer size to power of 2 to allow fast head/tail calculation. With
the new scheme, there is no more modulo operation on them and we simply
increment either pointer index directly.  This scheme also automatically
handles integer wrap nicely without any additional special treatment.


Signed-off-by: Ken Chen <kenchen@google.com>


diff --git a/fs/aio.c b/fs/aio.c
index e4598d6..00ecd14 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -108,8 +108,8 @@ static int aio_setup_ring(struct kioctx 
 	unsigned long size;
 	int nr_pages;
 
-	/* Compensate for the ring buffer's head/tail overlap entry */
-	nr_events += 2;	/* 1 is required, 2 for good luck */
+	/* round nr_event to next power of 2 */
+	nr_events = roundup_pow_of_two(nr_events);
 
 	size = sizeof(struct aio_ring);
 	size += sizeof(struct io_event) * nr_events;
@@ -118,8 +118,6 @@ static int aio_setup_ring(struct kioctx 
 	if (nr_pages < 0)
 		return -EINVAL;
 
-	nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
-
 	info->nr = 0;
 	info->ring_pages = info->internal_pages;
 	if (nr_pages > AIO_RING_PAGES) {
@@ -177,8 +175,8 @@ #define AIO_EVENTS_PER_PAGE	(PAGE_SIZE /
 #define AIO_EVENTS_FIRST_PAGE	((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
 #define AIO_EVENTS_OFFSET	(AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
 
-#define aio_ring_event(info, nr, km) ({					\
-	unsigned pos = (nr) + AIO_EVENTS_OFFSET;			\
+#define aio_ring_event(info, __nr, km) ({				\
+	unsigned pos = ((__nr) & ((info)->nr - 1)) + AIO_EVENTS_OFFSET;	\
 	struct io_event *__event;					\
 	__event = kmap_atomic(						\
 			(info)->ring_pages[pos / AIO_EVENTS_PER_PAGE], km); \
@@ -220,7 +218,6 @@ static struct kioctx *ioctx_alloc(unsign
 
 	atomic_set(&ctx->users, 1);
 	spin_lock_init(&ctx->ctx_lock);
-	spin_lock_init(&ctx->ring_info.ring_lock);
 	init_waitqueue_head(&ctx->wait);
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
@@ -927,7 +924,7 @@ int fastcall aio_complete(struct kiocb *
 	struct aio_ring	*ring;
 	struct io_event	*event;
 	unsigned long	flags;
-	unsigned long	tail;
+	unsigned	tail;
 	int		ret;
 
 	/*
@@ -969,8 +966,6 @@ int fastcall aio_complete(struct kiocb *
 
 	tail = info->tail;
 	event = aio_ring_event(info, tail, KM_IRQ0);
-	if (++tail >= info->nr)
-		tail = 0;
 
 	event->obj = (u64)(unsigned long)iocb->ki_obj.user;
 	event->data = iocb->ki_user_data;
@@ -986,13 +981,14 @@ int fastcall aio_complete(struct kiocb *
 	 */
 	smp_wmb();	/* make event visible before updating tail */
 
+	tail++;
 	info->tail = tail;
 	ring->tail = tail;
 
 	put_aio_ring_event(event, KM_IRQ0);
 	kunmap_atomic(ring, KM_IRQ1);
 
-	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
+	pr_debug("added to ring %p at [%u]\n", iocb, tail);
 put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
@@ -1007,42 +1003,36 @@ put_rq:
 /* aio_read_evt
  *	Pull an event off of the ioctx's event ring.  Returns the number of 
  *	events fetched (0 or 1 ;-)
- *	FIXME: make this use cmpxchg.
  *	TODO: make the ringbuffer user mmap()able (requires FIXME).
  */
 static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
 {
 	struct aio_ring_info *info = &ioctx->ring_info;
 	struct aio_ring *ring;
-	unsigned long head;
-	int ret = 0;
+	struct io_event *evp;
+	unsigned head;
+	int ret;
 
 	ring = kmap_atomic(info->ring_pages[0], KM_USER0);
-	dprintk("in aio_read_evt h%lu t%lu m%lu\n",
-		 (unsigned long)ring->head, (unsigned long)ring->tail,
-		 (unsigned long)ring->nr);
-
-	if (ring->head == ring->tail)
-		goto out;
-
-	spin_lock(&info->ring_lock);
+	dprintk("in aio_read_evt h%u t%u m%u\n",
+		 ring->head, ring->tail, ring->nr);
 
-	head = ring->head % info->nr;
-	if (head != ring->tail) {
-		struct io_event *evp = aio_ring_event(info, head, KM_USER1);
+	do {
+		head = ring->head;
+		if (head == ring->tail) {
+			ret = 0;
+			break;
+		}
+		evp = aio_ring_event(info, head, KM_USER1);
 		*ent = *evp;
-		head = (head + 1) % info->nr;
 		smp_mb(); /* finish reading the event before updatng the head */
-		ring->head = head;
 		ret = 1;
 		put_aio_ring_event(evp, KM_USER1);
-	}
-	spin_unlock(&info->ring_lock);
+	} while (head != cmpxchg(&ring->head, head, head + 1));
 
-out:
 	kunmap_atomic(ring, KM_USER0);
-	dprintk("leaving aio_read_evt: %d  h%lu t%lu\n", ret,
-		 (unsigned long)ring->head, (unsigned long)ring->tail);
+	dprintk("leaving aio_read_evt: %d  h%u t%u\n", ret,
+		 ring->head, ring->tail);
 	return ret;
 }
 
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a30ef13..42ca8d3 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -152,11 +152,10 @@ struct aio_ring {
 	unsigned	incompat_features;
 	unsigned	header_length;	/* size of aio_ring */
 
-
-	struct io_event		io_events[0];
+	struct io_event	io_events[0];
 }; /* 128 bytes + ring size */
 
-#define aio_ring_avail(info, ring)	(((ring)->head + (info)->nr - 1 - (ring)->tail) % (info)->nr)
+#define aio_ring_avail(info, ring) ((info)->nr + (ring)->head - (ring)->tail)
 
 #define AIO_RING_PAGES	8
 struct aio_ring_info {
@@ -164,7 +163,6 @@ struct aio_ring_info {
 	unsigned long		mmap_size;
 
 	struct page		**ring_pages;
-	spinlock_t		ring_lock;
 	long			nr_pages;
 
 	unsigned		nr, tail;


             reply	other threads:[~2007-04-10 23:54 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-10 23:53 Ken Chen [this message]
2007-04-11  5:18 ` [patch] convert aio event reap to use atomic-op instead of spin_lock Andrew Morton
2007-04-11 16:54   ` Ken Chen
2007-04-11 18:15     ` Zach Brown
2007-04-11 18:29       ` Ken Chen
2007-04-11 18:00 ` Zach Brown
2007-04-11 19:11   ` Andrew Morton
2007-04-11 19:34     ` Zach Brown
2007-04-11 19:28   ` Ken Chen
2007-04-11 19:44     ` Zach Brown
2007-04-11 19:45     ` Benjamin LaHaise
2007-04-11 19:52       ` Zach Brown
2007-04-11 20:03         ` Benjamin LaHaise
2007-04-12  7:29     ` Ken Chen
2007-04-12 12:06       ` Jeff Moyer
2007-04-13  0:57         ` Ken Chen
2007-04-13  1:08           ` Ken Chen
2007-04-12  7:50   ` Ken Chen
2007-04-12 14:31     ` Benjamin LaHaise
2007-04-12 15:13       ` Benjamin LaHaise

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070410235353.325A7346F64@localhost \
    --to=kenchen@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-aio@kvack.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox