* [PATCH] aio: using hash table for active requests
@ 2010-12-15 3:03 Li Yu
2010-12-15 22:02 ` Jeff Moyer
0 siblings, 1 reply; 3+ messages in thread
From: Li Yu @ 2010-12-15 3:03 UTC (permalink / raw)
To: bcrl; +Cc: viro, linux-aio, linux-fsdevel, linux-kernel
This patch remove a TODO in fs/aio.c, that is to use hash table for active requests.
I prefer add an iocb at tail of collision chain, so I do not use hlist here.
Signed-off-by: Li Yu <raise.sail@gmail.com>
---
fs/aio.c | 90 ++++++++++++++++++++++++++++++++++++++--------------
include/linux/aio.h | 2 -
2 files changed, 68 insertions(+), 24 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 8c8f6c5..fee2aa3 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -65,6 +65,15 @@ static DECLARE_WORK(fput_work, aio_fput_routine);
static DEFINE_SPINLOCK(fput_lock);
static LIST_HEAD(fput_head);
+#if BITS_PER_LONG == 64
+#define AIO_ACTREQ_BUCKETS_SHIFT 36
+#elif BITS_PER_LONG == 32
+#define AIO_ACTREQ_BUCKETS_SHIFT 24
+#endif
+
+/* AIO_ACTREQ_BUCKETS must be power of 2 */
+#define AIO_ACTREQ_BUCKETS (2*PAGE_SIZE/sizeof(struct list_head))
+
#define AIO_BATCH_HASH_BITS 3 /* allocated on-stack, so don't go crazy */
#define AIO_BATCH_HASH_SIZE (1 << AIO_BATCH_HASH_BITS)
struct aio_batch_entry {
@@ -212,6 +221,9 @@ static void ctx_rcu_free(struct rcu_head *head)
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
unsigned nr_events = ctx->max_reqs;
+ kfree(ctx->active_reqs_table);
+ ctx->active_reqs_table = NULL;
+
kmem_cache_free(kioctx_cachep, ctx);
if (nr_events) {
@@ -249,6 +261,19 @@ static void __put_ioctx(struct kioctx *ctx)
__put_ioctx(kioctx); \
} while (0)
+static int ioctx_active_reqs_init(struct kioctx *ctx)
+{
+ int i;
+
+ ctx->active_reqs_table = kmalloc(AIO_ACTREQ_BUCKETS*sizeof(struct list_head), GFP_KERNEL);
+ if (!ctx->active_reqs_table)
+ return 1;
+ /* we want to use list_add_tail(), hlist does not provide this API so far ... */
+ for (i = 0; i < AIO_ACTREQ_BUCKETS; ++i)
+ INIT_LIST_HEAD(ctx->active_reqs_table+i);
+ return 0;
+}
+
/* ioctx_alloc
* Allocates and initializes an ioctx. Returns an ERR_PTR if it failed.
*/
@@ -281,7 +306,8 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
spin_lock_init(&ctx->ring_info.ring_lock);
init_waitqueue_head(&ctx->wait);
- INIT_LIST_HEAD(&ctx->active_reqs);
+ if (ioctx_active_reqs_init(ctx))
+ goto out_freectx;
INIT_LIST_HEAD(&ctx->run_list);
INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
@@ -331,6 +357,21 @@ out_freectx:
return ctx;
}
+static inline void aio_cancel_one(struct kioctx *ctx, struct kiocb *iocb)
+{
+ int (*cancel)(struct kiocb *, struct io_event *);
+ struct io_event res;
+
+ cancel = iocb->ki_cancel;
+ kiocbSetCancelled(iocb);
+ if (cancel) {
+ iocb->ki_users++;
+ spin_unlock_irq(&ctx->ctx_lock);
+ cancel(iocb, &res);
+ spin_lock_irq(&ctx->ctx_lock);
+ }
+}
+
/* aio_cancel_all
* Cancels all outstanding aio requests on an aio context. Used
* when the processes owning a context have all exited to encourage
@@ -338,22 +379,21 @@ out_freectx:
*/
static void aio_cancel_all(struct kioctx *ctx)
{
- int (*cancel)(struct kiocb *, struct io_event *);
- struct io_event res;
+ int i, cleaned;
+ i = cleaned = 0;
+
spin_lock_irq(&ctx->ctx_lock);
ctx->dead = 1;
- while (!list_empty(&ctx->active_reqs)) {
- struct list_head *pos = ctx->active_reqs.next;
- struct kiocb *iocb = list_kiocb(pos);
- list_del_init(&iocb->ki_list);
- cancel = iocb->ki_cancel;
- kiocbSetCancelled(iocb);
- if (cancel) {
- iocb->ki_users++;
- spin_unlock_irq(&ctx->ctx_lock);
- cancel(iocb, &res);
- spin_lock_irq(&ctx->ctx_lock);
+ for (; i < AIO_ACTREQ_BUCKETS; i++) {
+ while (!list_empty(&ctx->active_reqs_table[i])) {
+ struct list_head *pos = ctx->active_reqs_table[i].next;
+ struct kiocb *iocb = list_kiocb(pos);
+ list_del_init(pos);
+ aio_cancel_one(ctx, iocb);
+ ++cleaned;
}
+ if (cleaned >= ctx->reqs_active)
+ break;
}
spin_unlock_irq(&ctx->ctx_lock);
}
@@ -440,8 +480,9 @@ void exit_aio(struct mm_struct *mm)
* This prevents races between the aio code path referencing the
* req (after submitting it) and aio_complete() freeing the req.
*/
-static struct kiocb *__aio_get_req(struct kioctx *ctx)
+static struct kiocb *__aio_get_req(struct kioctx *ctx, void* tohash)
{
+ unsigned long bucket;
struct kiocb *req = NULL;
struct aio_ring *ring;
int okay = 0;
@@ -465,10 +506,12 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
/* Check if the completion queue has enough free space to
* accept an event from this io.
*/
+ bucket = hash_long((unsigned long)tohash, AIO_ACTREQ_BUCKETS_SHIFT);
+ bucket &= (AIO_ACTREQ_BUCKETS - 1);
spin_lock_irq(&ctx->ctx_lock);
ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
- list_add(&req->ki_list, &ctx->active_reqs);
+ list_add_tail(&req->ki_list, &ctx->active_reqs_table[bucket]);
ctx->reqs_active++;
okay = 1;
}
@@ -483,17 +526,17 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
return req;
}
-static inline struct kiocb *aio_get_req(struct kioctx *ctx)
+static inline struct kiocb *aio_get_req(struct kioctx *ctx, void *iocb)
{
struct kiocb *req;
/* Handle a potential starvation case -- should be exceedingly rare as
* requests will be stuck on fput_head only if the aio_fput_routine is
* delayed and the requests were the last user of the struct file.
*/
- req = __aio_get_req(ctx);
+ req = __aio_get_req(ctx, iocb);
if (unlikely(NULL == req)) {
aio_fput_routine(NULL);
- req = __aio_get_req(ctx);
+ req = __aio_get_req(ctx, iocb);
}
return req;
}
@@ -1605,7 +1648,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!file))
return -EBADF;
- req = aio_get_req(ctx); /* returns with 2 references to req */
+ req = aio_get_req(ctx, user_iocb); /* returns with 2 references to req */
if (unlikely(!req)) {
fput(file);
return -EAGAIN;
@@ -1744,11 +1787,12 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
u32 key)
{
struct list_head *pos;
+ unsigned long bucket;
assert_spin_locked(&ctx->ctx_lock);
-
- /* TODO: use a hash or array, this sucks. */
- list_for_each(pos, &ctx->active_reqs) {
+ bucket = hash_long((unsigned long)iocb, AIO_ACTREQ_BUCKETS_SHIFT);
+ bucket &= (AIO_ACTREQ_BUCKETS - 1);
+ list_for_each(pos, &ctx->active_reqs_table[bucket]) {
struct kiocb *kiocb = list_kiocb(pos);
if (kiocb->ki_obj.user == iocb && kiocb->ki_key == key)
return kiocb;
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 7a8db41..1cf394b 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -189,7 +189,7 @@ struct kioctx {
spinlock_t ctx_lock;
int reqs_active;
- struct list_head active_reqs; /* used for cancellation */
+ struct list_head* active_reqs_table; /* used for cancellation */
struct list_head run_list; /* used for kicked reqs */
/* sys_io_setup currently limits this to an unsigned int */
--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org. For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] aio: using hash table for active requests
2010-12-15 3:03 [PATCH] aio: using hash table for active requests Li Yu
@ 2010-12-15 22:02 ` Jeff Moyer
2010-12-20 1:57 ` Li Yu
0 siblings, 1 reply; 3+ messages in thread
From: Jeff Moyer @ 2010-12-15 22:02 UTC (permalink / raw)
To: Li Yu; +Cc: bcrl, viro, linux-aio, linux-fsdevel, linux-kernel
Li Yu <raise.sail@gmail.com> writes:
> This patch remove a TODO in fs/aio.c, that is to use hash table for
> active requests.
It's remained a TODO item for this long because it hasn't really
mattered in the past. Was there other motivation for this patch besides
the TODO comment?
> I prefer add an iocb at tail of collision chain, so I do not use hlist
> here.
Why do you prefer to add to the tail? It makes more sense to add
collisions at the head, since if you're going to cancel everything, you
have a better chance of cancelling stuff that was queued later than
earlier (assuming you can cancel anything at all, which for most cases,
you can't). Also, you're halving the number of buckets, so you should
have a good reason.
What sort of testing did you do?
I've made some cursory comments below. I'll more fully review this if
you can provide a bit more justification and some proof of testing. I'd
be really surprised if this helped any real workloads, though.
Cheers,
Jeff
> +static int ioctx_active_reqs_init(struct kioctx *ctx)
> +{
> + int i;
> +
> + ctx->active_reqs_table = kmalloc(AIO_ACTREQ_BUCKETS*sizeof(struct list_head), GFP_KERNEL);
Fit this into 80 columns, please. You may just want to run checkpatch
over the whole thing.
> +static inline void aio_cancel_one(struct kioctx *ctx, struct kiocb *iocb)
I see no good reason to inline this.
> @@ -465,10 +506,12 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
> /* Check if the completion queue has enough free space to
> * accept an event from this io.
> */
> + bucket = hash_long((unsigned long)tohash, AIO_ACTREQ_BUCKETS_SHIFT);
hash_ptr?
> - struct list_head active_reqs; /* used for cancellation */
> + struct list_head* active_reqs_table; /* used for cancellation */
Coding Style
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] aio: using hash table for active requests
2010-12-15 22:02 ` Jeff Moyer
@ 2010-12-20 1:57 ` Li Yu
0 siblings, 0 replies; 3+ messages in thread
From: Li Yu @ 2010-12-20 1:57 UTC (permalink / raw)
To: Jeff Moyer; +Cc: bcrl, viro, linux-aio, linux-fsdevel, linux-kernel
于 2010年12月16日 06:02, Jeff Moyer 写道:
> Li Yu <raise.sail@gmail.com> writes:
>
>> This patch remove a TODO in fs/aio.c, that is to use hash table for
>> active requests.
>
> It's remained a TODO item for this long because it hasn't really
> mattered in the past. Was there other motivation for this patch besides
> the TODO comment?
>
:), indeed, to solve a TODO is my major motivation.
>> I prefer add an iocb at tail of collision chain, so I do not use hlist
>> here.
>
> Why do you prefer to add to the tail? It makes more sense to add
> collisions at the head, since if you're going to cancel everything, you
> have a better chance of cancelling stuff that was queued later than
> earlier (assuming you can cancel anything at all, which for most cases,
> you can't). Also, you're halving the number of buckets, so you should
> have a good reason.
>
I misunderstanded major feature of these code ... it just only can speed up a bit for io_cancel() syscall.
However, the "add to tail" design is wrong here, I ever thinked each hash bucket acts as a FIFO queue, so add an element to tail of list can reduce dequeue time.
> What sort of testing did you do?
>
> I've made some cursory comments below. I'll more fully review this if
> you can provide a bit more justification and some proof of testing. I'd
> be really surprised if this helped any real workloads, though.
Yes, I have some testings for this patches, it seem that its performance impact can be ignored, I tested by fio, iodepth=65535, size=1024m.
Thanks for your review time!
Yu
>
> Cheers,
> Jeff
>
>> +static int ioctx_active_reqs_init(struct kioctx *ctx)
>> +{
>> + int i;
>> +
>> + ctx->active_reqs_table = kmalloc(AIO_ACTREQ_BUCKETS*sizeof(struct list_head), GFP_KERNEL);
>
> Fit this into 80 columns, please. You may just want to run checkpatch
> over the whole thing.
>
>> +static inline void aio_cancel_one(struct kioctx *ctx, struct kiocb *iocb)
>
> I see no good reason to inline this.
>
>> @@ -465,10 +506,12 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
>> /* Check if the completion queue has enough free space to
>> * accept an event from this io.
>> */
>> + bucket = hash_long((unsigned long)tohash, AIO_ACTREQ_BUCKETS_SHIFT);
>
> hash_ptr?
>
>> - struct list_head active_reqs; /* used for cancellation */
>> + struct list_head* active_reqs_table; /* used for cancellation */
>
> Coding Style
>
--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org. For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2010-12-20 1:57 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-15 3:03 [PATCH] aio: using hash table for active requests Li Yu
2010-12-15 22:02 ` Jeff Moyer
2010-12-20 1:57 ` Li Yu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).