From: Suparna Bhattacharya <suparna@in.ibm.com>
To: bcrl@redhat.com, akpm@digeo.com
Cc: linux-aio@kvack.org, linux-kernel@vger.kernel.org
Subject: Re: [Patch 1/2] Retry based aio read - core aio changes
Date: Wed, 5 Mar 2003 14:56:33 +0530 [thread overview]
Message-ID: <20030305145633.A1627@in.ibm.com> (raw)
In-Reply-To: <20030305144754.A1600@in.ibm.com>; from suparna@in.ibm.com on Wed, Mar 05, 2003 at 02:47:54PM +0530
On Wed, Mar 05, 2003 at 02:47:54PM +0530, Suparna Bhattacharya wrote:
> For the last few days I've been playing with prototyping
> a particular flavour of a retry based implementation for
> filesystem aio read.
# aioretry.patch : Core aio infrastructure modifications
for high-level retry based aio
Includes couple of fixes in the aio code
-The kiocbClearXXX were doing a set_bit instead of
clear_bit
-Sync iocbs were not woken up when iocb->ki_users = 1
(dio takes a different path for sync and async iocbs,
so maybe that's why we weren't seeing the problem yet)
Regards
Suparna
----------------------------------------------------
diff -ur linux-2.5.62/fs/aio.c linux-2.5.62-aio/fs/aio.c
--- linux-2.5.62/fs/aio.c Tue Feb 18 04:26:14 2003
+++ linux-2.5.62-aio/fs/aio.c Tue Mar 4 19:54:24 2003
@@ -314,14 +314,15 @@
*/
ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
{
- while (iocb->ki_users) {
+ printk("wait_on_sync_iocb\n");
+ while ((iocb->ki_users) && !kiocbIsKicked(iocb)) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!iocb->ki_users)
+ if (!iocb->ki_users || kiocbIsKicked(iocb))
break;
schedule();
}
__set_current_state(TASK_RUNNING);
- return iocb->ki_user_data;
+ return iocb->ki_users ? -EIOCBQUEUED : iocb->ki_user_data;
}
/* exit_aio: called when the last user of mm goes away. At this point,
@@ -395,6 +396,7 @@
req->ki_cancel = NULL;
req->ki_retry = NULL;
req->ki_user_obj = NULL;
+ INIT_LIST_HEAD(&req->ki_run_list);
/* Check if the completion queue has enough free space to
* accept an event from this io.
@@ -558,15 +560,20 @@
enter_lazy_tlb(mm, current, smp_processor_id());
}
-/* Run on kevent's context. FIXME: needs to be per-cpu and warn if an
- * operation blocks.
- */
-static void aio_kick_handler(void *data)
+static inline int __queue_kicked_iocb(struct kiocb *iocb)
{
- struct kioctx *ctx = data;
+ struct kioctx *ctx = iocb->ki_ctx;
- use_mm(ctx->mm);
+ if (list_empty(&iocb->ki_run_list)) {
+ list_add_tail(&iocb->ki_run_list,
+ &ctx->run_list);
+ return 1;
+ }
+ return 0;
+}
+static void aio_run_iocbs(struct kioctx *ctx)
+{
spin_lock_irq(&ctx->ctx_lock);
while (!list_empty(&ctx->run_list)) {
struct kiocb *iocb;
@@ -574,30 +581,67 @@
iocb = list_entry(ctx->run_list.next, struct kiocb,
ki_run_list);
- list_del(&iocb->ki_run_list);
+ list_del_init(&iocb->ki_run_list);
iocb->ki_users ++;
- spin_unlock_irq(&ctx->ctx_lock);
- kiocbClearKicked(iocb);
- ret = iocb->ki_retry(iocb);
- if (-EIOCBQUEUED != ret) {
- aio_complete(iocb, ret, 0);
- iocb = NULL;
+ if (!kiocbTryStart(iocb)) {
+ kiocbClearKicked(iocb);
+ spin_unlock_irq(&ctx->ctx_lock);
+ ret = iocb->ki_retry(iocb);
+ if (-EIOCBQUEUED != ret) {
+ if (list_empty(&iocb->ki_wait.task_list))
+ aio_complete(iocb, ret, 0);
+ else
+ printk("can't delete iocb in use\n");
+ }
+ spin_lock_irq(&ctx->ctx_lock);
+ kiocbClearStarted(iocb);
+ if (kiocbIsKicked(iocb))
+ __queue_kicked_iocb(iocb);
+ } else {
+ printk("iocb already started\n");
}
-
- spin_lock_irq(&ctx->ctx_lock);
if (NULL != iocb)
__aio_put_req(ctx, iocb);
}
spin_unlock_irq(&ctx->ctx_lock);
+}
+
+/* Run on aiod/kevent's context. FIXME: needs to be per-cpu and warn if an
+ * operation blocks.
+ */
+static void aio_kick_handler(void *data)
+{
+ struct kioctx *ctx = data;
+
+ use_mm(ctx->mm);
+ aio_run_iocbs(ctx);
unuse_mm(ctx->mm);
}
-void kick_iocb(struct kiocb *iocb)
+
+void queue_kicked_iocb(struct kiocb *iocb)
{
struct kioctx *ctx = iocb->ki_ctx;
+ unsigned long flags;
+ int run = 0;
+ WARN_ON((!list_empty(&iocb->ki_wait.task_list)));
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ run = __queue_kicked_iocb(iocb);
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+ if (run) {
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+ else
+ queue_work(aio_wq, &ctx->wq);
+ }
+}
+
+void kick_iocb(struct kiocb *iocb)
+{
/* sync iocbs are easy: they can only ever be executing from a
* single context. */
if (is_sync_kiocb(iocb)) {
@@ -606,12 +650,11 @@
return;
}
- if (!kiocbTryKick(iocb)) {
- unsigned long flags;
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_add_tail(&iocb->ki_run_list, &ctx->run_list);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- schedule_work(&ctx->wq);
+
+ if (!kiocbTryKick(iocb) && !kiocbIsStarted(iocb)) {
+ queue_kicked_iocb(iocb);
+ } else {
+ pr_debug("iocb already kicked or in progress\n");
}
}
@@ -642,13 +685,13 @@
iocb->ki_user_data = res;
if (iocb->ki_users == 1) {
iocb->ki_users = 0;
- return 1;
+ ret = 1;
+ } else {
+ spin_lock_irq(&ctx->ctx_lock);
+ iocb->ki_users--;
+ ret = (0 == iocb->ki_users);
+ spin_unlock_irq(&ctx->ctx_lock);
}
- spin_lock_irq(&ctx->ctx_lock);
- iocb->ki_users--;
- ret = (0 == iocb->ki_users);
- spin_unlock_irq(&ctx->ctx_lock);
-
/* sync iocbs put the task here for us */
wake_up_process(iocb->ki_user_obj);
return ret;
@@ -664,6 +707,9 @@
*/
spin_lock_irqsave(&ctx->ctx_lock, flags);
+ if (!list_empty(&iocb->ki_run_list))
+ list_del_init(&iocb->ki_run_list);
+
ring = kmap_atomic(info->ring_pages[0], KM_IRQ1);
tail = info->tail;
@@ -865,6 +911,8 @@
ret = 0;
if (to.timed_out) /* Only check after read evt */
break;
+ /* accelerate kicked iocbs for this ctx */
+ aio_run_iocbs(ctx);
schedule();
if (signal_pending(tsk)) {
ret = -EINTR;
@@ -984,6 +1032,114 @@
return -EINVAL;
}
+/* Called during initial submission and subsequent retry operations */
+long aio_process_iocb(struct kiocb *iocb)
+{
+ struct file *file = iocb->ki_filp;
+ ssize_t ret = 0;
+
+ if (iocb->ki_retried++ > 1024*1024) {
+ printk("Maximal retry count. Bytes done %d\n",
+ iocb->ki_nbytes - iocb->ki_left);
+ return -EAGAIN;
+ }
+
+ if (!(iocb->ki_retried & 0xff)) {
+ printk("%ld aio retries completed %d bytes of %d\n",
+ iocb->ki_retried,
+ iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
+ }
+
+ BUG_ON(current->iocb != NULL);
+
+ current->iocb = iocb;
+
+ switch (iocb->ki_opcode) {
+ case IOCB_CMD_PREAD:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_READ)))
+ goto out;
+ ret = -EFAULT;
+ if (unlikely(!access_ok(VERIFY_WRITE, iocb->ki_buf,
+ iocb->ki_left)))
+ goto out;
+ ret = -EINVAL;
+ if (file->f_op->aio_read)
+ ret = file->f_op->aio_read(iocb, iocb->ki_buf,
+ iocb->ki_left, iocb->ki_pos);
+ break;
+ case IOCB_CMD_PWRITE:
+ ret = -EBADF;
+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
+ goto out;
+ ret = -EFAULT;
+ if (unlikely(!access_ok(VERIFY_READ, iocb->ki_buf,
+ iocb->ki_left)))
+ goto out;
+ ret = -EINVAL;
+ if (file->f_op->aio_write)
+ ret = file->f_op->aio_write(iocb, iocb->ki_buf,
+ iocb->ki_left, iocb->ki_pos);
+ break;
+ case IOCB_CMD_FDSYNC:
+ ret = -EINVAL;
+ if (file->f_op->aio_fsync)
+ ret = file->f_op->aio_fsync(iocb, 1);
+ break;
+ case IOCB_CMD_FSYNC:
+ ret = -EINVAL;
+ if (file->f_op->aio_fsync)
+ ret = file->f_op->aio_fsync(iocb, 0);
+ break;
+ default:
+ dprintk("EINVAL: io_submit: no operation provided\n");
+ ret = -EINVAL;
+ }
+
+ pr_debug("aio_process_iocb: fop ret %d\n", ret);
+ if (likely(-EIOCBQUEUED == ret)) {
+ blk_run_queues();
+ goto out;
+ }
+ if (ret > 0) {
+ iocb->ki_buf += ret;
+ iocb->ki_left -= ret;
+
+ /* Not done yet or a short read, or.. */
+ if (iocb->ki_left
+ /* may have copied out data but not completed writing */
+ || ((iocb->ki_left == 0) &&
+ (iocb->ki_opcode = IOCB_CMD_PWRITE)) ){
+ /* FIXME:Can we find a better way to handle this ? */
+ /* Force an extra retry to determine if we're done */
+ ret = -EIOCBQUEUED;
+ goto out;
+ }
+
+ }
+
+ if (ret >= 0)
+ ret = iocb->ki_nbytes - iocb->ki_left;
+
+out:
+ pr_debug("ki_pos = %llu\n", iocb->ki_pos);
+ current->iocb = NULL;
+ if ((-EIOCBQUEUED == ret) && list_empty(&iocb->ki_wait.task_list)) {
+ kiocbSetKicked(iocb);
+ }
+
+ return ret;
+}
+
+int aio_wake_function(wait_queue_t *wait, unsigned mode, int sync)
+{
+ struct kiocb *iocb = container_of(wait, struct kiocb, ki_wait);
+
+ list_del_init(&wait->task_list);
+ kick_iocb(iocb);
+ return 1;
+}
+
static int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
struct iocb *iocb));
static int io_submit_one(struct kioctx *ctx, struct iocb *user_iocb,
@@ -991,8 +1147,7 @@
{
struct kiocb *req;
struct file *file;
- ssize_t ret;
- char *buf;
+ long ret;
/* enforce forwards compatibility on users */
if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2 ||
@@ -1033,50 +1188,34 @@
req->ki_user_data = iocb->aio_data;
req->ki_pos = iocb->aio_offset;
- buf = (char *)(unsigned long)iocb->aio_buf;
+ req->ki_buf = (char *)(unsigned long)iocb->aio_buf;
+ req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
+ req->ki_opcode = iocb->aio_lio_opcode;
+ req->ki_retry = aio_process_iocb;
+ init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
+ INIT_LIST_HEAD(&req->ki_wait.task_list);
+ req->ki_retried = 0;
+ kiocbSetStarted(req);
- switch (iocb->aio_lio_opcode) {
- case IOCB_CMD_PREAD:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_READ)))
- goto out_put_req;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_WRITE, buf, iocb->aio_nbytes)))
- goto out_put_req;
- ret = -EINVAL;
- if (file->f_op->aio_read)
- ret = file->f_op->aio_read(req, buf,
- iocb->aio_nbytes, req->ki_pos);
- break;
- case IOCB_CMD_PWRITE:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- goto out_put_req;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_READ, buf, iocb->aio_nbytes)))
- goto out_put_req;
- ret = -EINVAL;
- if (file->f_op->aio_write)
- ret = file->f_op->aio_write(req, buf,
- iocb->aio_nbytes, req->ki_pos);
- break;
- case IOCB_CMD_FDSYNC:
- ret = -EINVAL;
- if (file->f_op->aio_fsync)
- ret = file->f_op->aio_fsync(req, 1);
- break;
- case IOCB_CMD_FSYNC:
- ret = -EINVAL;
- if (file->f_op->aio_fsync)
- ret = file->f_op->aio_fsync(req, 0);
- break;
- default:
- dprintk("EINVAL: io_submit: no operation provided\n");
- ret = -EINVAL;
- }
+ ret = aio_process_iocb(req);
+
+ if (likely(-EIOCBQUEUED == ret)) {
+ int run = 0;
+
+ spin_lock_irq(&ctx->ctx_lock);
+ kiocbClearStarted(req);
+ if (kiocbIsKicked(req))
+ run =__queue_kicked_iocb(req);
+ spin_unlock_irq(&ctx->ctx_lock);
+ if (run)
+ queue_work(aio_wq, &ctx->wq);
- if (likely(-EIOCBQUEUED == ret))
return 0;
+ }
+
+ if ((-EBADF == ret) || (-EFAULT == ret))
+ goto out_put_req;
+
aio_complete(req, ret, 0);
return 0;
diff -ur linux-2.5.62/include/linux/aio.h linux-2.5.62-aio/include/linux/aio.h
--- linux-2.5.62/include/linux/aio.h Tue Feb 18 04:25:50 2003
+++ linux-2.5.62-aio/include/linux/aio.h Mon Mar 3 12:17:12 2003
@@ -29,21 +29,26 @@
#define KIF_LOCKED 0
#define KIF_KICKED 1
#define KIF_CANCELLED 2
+#define KIF_STARTED 3
#define kiocbTryLock(iocb) test_and_set_bit(KIF_LOCKED, &(iocb)->ki_flags)
#define kiocbTryKick(iocb) test_and_set_bit(KIF_KICKED, &(iocb)->ki_flags)
+#define kiocbTryStart(iocb) test_and_set_bit(KIF_STARTED, &(iocb)->ki_flags)
#define kiocbSetLocked(iocb) set_bit(KIF_LOCKED, &(iocb)->ki_flags)
#define kiocbSetKicked(iocb) set_bit(KIF_KICKED, &(iocb)->ki_flags)
#define kiocbSetCancelled(iocb) set_bit(KIF_CANCELLED, &(iocb)->ki_flags)
+#define kiocbSetStarted(iocb) set_bit(KIF_STARTED, &(iocb)->ki_flags)
-#define kiocbClearLocked(iocb) set_bit(KIF_LOCKED, &(iocb)->ki_flags)
-#define kiocbClearKicked(iocb) set_bit(KIF_KICKED, &(iocb)->ki_flags)
-#define kiocbClearCancelled(iocb) set_bit(KIF_CANCELLED, &(iocb)->ki_flags)
+#define kiocbClearLocked(iocb) clear_bit(KIF_LOCKED, &(iocb)->ki_flags)
+#define kiocbClearKicked(iocb) clear_bit(KIF_KICKED, &(iocb)->ki_flags)
+#define kiocbClearCancelled(iocb) clear_bit(KIF_CANCELLED, &(iocb)->ki_flags)
+#define kiocbClearStarted(iocb) clear_bit(KIF_STARTED, &(iocb)->ki_flags)
#define kiocbIsLocked(iocb) test_bit(0, &(iocb)->ki_flags)
#define kiocbIsKicked(iocb) test_bit(1, &(iocb)->ki_flags)
#define kiocbIsCancelled(iocb) test_bit(2, &(iocb)->ki_flags)
+#define kiocbIsStarted(iocb) test_bit(3, &(iocb)->ki_flags)
struct kiocb {
struct list_head ki_run_list;
@@ -62,6 +67,14 @@
void *ki_user_obj; /* pointer to userland's iocb */
__u64 ki_user_data; /* user's data for completion */
loff_t ki_pos;
+
+ /* State that we remember to be able to restart/retry */
+ unsigned short ki_opcode;
+ size_t ki_nbytes; /* copy of iocb->aio_nbytes */
+ char *ki_buf; /* remaining iocb->aio_buf */
+ size_t ki_left; /* remaining bytes */
+ wait_queue_t ki_wait;
+ long ki_retried; /* just for testing */
char private[KIOCB_PRIVATE_SIZE];
};
@@ -77,6 +90,8 @@
(x)->ki_ctx = &tsk->active_mm->default_kioctx; \
(x)->ki_cancel = NULL; \
(x)->ki_user_obj = tsk; \
+ (x)->ki_user_data = 0; \
+ init_wait((&(x)->ki_wait)); \
} while (0)
#define AIO_RING_MAGIC 0xa10a10a1
diff -ur linux-2.5.62/include/linux/init_task.h linux-2.5.62-aio/include/linux/init_task.h
--- linux-2.5.62/include/linux/init_task.h Tue Feb 18 04:25:53 2003
+++ linux-2.5.62-aio/include/linux/init_task.h Thu Feb 27 19:01:39 2003
@@ -101,6 +101,7 @@
.alloc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
+ .iocb = NULL, \
}
diff -ur linux-2.5.62/include/linux/sched.h linux-2.5.62-aio/include/linux/sched.h
--- linux-2.5.62/include/linux/sched.h Tue Feb 18 04:25:53 2003
+++ linux-2.5.62-aio/include/linux/sched.h Thu Feb 27 19:01:39 2003
@@ -418,6 +418,8 @@
unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
+/* current aio handle */
+ struct kiocb *iocb;
};
extern void __put_task_struct(struct task_struct *tsk);
diff -ur linux-2.5.62/kernel/fork.c linux-2.5.62-aio/kernel/fork.c
--- linux-2.5.62/kernel/fork.c Tue Feb 18 04:25:53 2003
+++ linux-2.5.62-aio/kernel/fork.c Tue Mar 4 14:58:44 2003
@@ -128,6 +128,10 @@
spin_lock_irqsave(&q->lock, flags);
if (list_empty(&wait->task_list))
__add_wait_queue(q, wait);
+ else {
+ if (current->iocb && (wait == ¤t->iocb->ki_wait))
+ printk("prepare_to_wait: iocb->ki_wait in use\n");
+ }
spin_unlock_irqrestore(&q->lock, flags);
}
@@ -834,6 +838,7 @@
p->lock_depth = -1; /* -1 = no lock */
p->start_time = get_jiffies_64();
p->security = NULL;
+ p->iocb = NULL;
retval = -ENOMEM;
if (security_task_alloc(p))
next prev parent reply other threads:[~2003-03-05 9:11 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-03-05 9:17 [RFC][Patch] Retry based aio read for filesystems Suparna Bhattacharya
2003-03-05 9:26 ` Suparna Bhattacharya [this message]
2003-03-14 13:23 ` [Patch 1/2] Retry based aio read - core aio changes Suparna Bhattacharya
2003-03-05 9:30 ` [Patch 2/2] Retry based aio read - filesystem read changes Suparna Bhattacharya
2003-03-05 10:42 ` Andrew Morton
2003-03-05 12:14 ` Suparna Bhattacharya
2003-03-31 18:32 ` Janet Morgan
2003-03-31 19:11 ` William Lee Irwin III
2003-03-31 19:16 ` Benjamin LaHaise
2003-03-31 19:07 ` Janet Morgan
2003-04-01 20:24 ` Benjamin LaHaise
2003-03-31 19:17 ` William Lee Irwin III
2003-03-31 19:25 ` Benjamin LaHaise
2003-04-07 3:51 ` Suparna Bhattacharya
2003-03-05 23:00 ` [RFC][Patch] Retry based aio read for filesystems Janet Morgan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20030305145633.A1627@in.ibm.com \
--to=suparna@in.ibm.com \
--cc=akpm@digeo.com \
--cc=bcrl@redhat.com \
--cc=linux-aio@kvack.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.