From: Zach Brown <zach.brown@oracle.com>
To: linux-fsdevel@vger.kernel.org
Subject: [PATCH 3/8] aio: add an interface to submit aio from the kernel
Date: Thu, 22 Oct 2009 13:25:52 -0700 [thread overview]
Message-ID: <1256243157-16667-4-git-send-email-zach.brown@oracle.com> (raw)
In-Reply-To: <1256243157-16667-3-git-send-email-zach.brown@oracle.com>
This adds a simple interface that lets other parts of the kernel submit aio
iocbs. Callers provide a function which is called as the IO completes.
These iocbs aren't tracked to reduce overhead: they can't be canceled, callers
limit the number in flight, and previous patches in this series removed
retry-based aio.
Signed-off-by: Zach Brown <zach.brown@oracle.com>
---
fs/aio.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/aio.h | 11 ++++++
2 files changed, 97 insertions(+), 0 deletions(-)
diff --git a/fs/aio.c b/fs/aio.c
index 2406981..7a150c2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -843,6 +843,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2)
iocb->ki_users = 0;
wake_up_process(iocb->ki_obj.tsk);
return 1;
+ } else if (is_kernel_kiocb(iocb)) {
+ iocb->ki_obj.complete(iocb->ki_user_data, res);
+ aio_kernel_free(iocb);
+ return 0;
}
info = &ctx->ring_info;
@@ -1706,3 +1710,85 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout);
return ret;
}
+
+/*
+ * This allocates an iocb that will be used to submit and track completion of
+ * an IO that is issued from kernel space. We don't have a context, we don't
+ * limit the number pending, and we can't be canceled. The caller is expected
+ * to call the appropriate aio_kernel_init_() functions and then call
+ * aio_kernel_submit(). From that point forward progress is guaranteed by the
+ * file system aio method. Eventually the caller's completion callback will be
+ * called.
+ */
+struct kiocb *aio_kernel_alloc(gfp_t gfp)
+{
+ struct kiocb *iocb = kmem_cache_zalloc(kiocb_cachep, gfp);
+ if (iocb)
+ iocb->ki_key = KIOCB_KERNEL_KEY;
+ return iocb;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_alloc);
+
+void aio_kernel_free(struct kiocb *iocb)
+{
+ if (iocb)
+ kmem_cache_free(kiocb_cachep, iocb);
+}
+EXPORT_SYMBOL_GPL(aio_kernel_free);
+
+/*
+ * ptr and count can be a buff and bytes or an iov and segs.
+ */
+void aio_kernel_init_rw(struct kiocb *iocb, struct file *filp,
+ unsigned short op, void *ptr, size_t nr, loff_t off)
+{
+ iocb->ki_filp = filp;
+ iocb->ki_opcode = op;
+ iocb->ki_buf = (char __user *)(unsigned long)ptr;
+ iocb->ki_left = nr;
+ iocb->ki_nbytes = nr;
+ iocb->ki_pos = off;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_init_rw);
+
+void aio_kernel_init_callback(struct kiocb *iocb,
+ void (*complete)(u64 user_data, long res),
+ u64 user_data)
+{
+ iocb->ki_obj.complete = complete;
+ iocb->ki_user_data = user_data;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_init_callback);
+
+/*
+ * The iocb is our responsibility once this is called. The caller must not
+ * reference it. This comes from aio_setup_iocb() modifying the iocb.
+ *
+ * Callers must be prepared for their iocb completion callback to be called the
+ * moment they enter this function. The completion callback may be called from
+ * any context.
+ *
+ * Returns: 0: the iocb completion callback will be called with the op result
+ * negative errno: the operation was not submitted and the iocb was freed
+ */
+int aio_kernel_submit(struct kiocb *iocb)
+{
+ int ret;
+
+ BUG_ON(!is_kernel_kiocb(iocb));
+ BUG_ON(!iocb->ki_obj.complete);
+ BUG_ON(!iocb->ki_filp);
+
+ ret = aio_setup_iocb(iocb);
+ if (ret) {
+ aio_kernel_free(iocb);
+ return ret;
+ }
+
+ ret = iocb->ki_retry(iocb);
+ if (ret != -EIOCBQUEUED)
+ aio_complete(iocb, ret, 0);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(aio_kernel_submit);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 4f88ec2..95ef1ea 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -24,6 +24,7 @@ struct kioctx;
#define KIOCB_C_COMPLETE 0x02
#define KIOCB_SYNC_KEY (~0U)
+#define KIOCB_KERNEL_KEY (~1U)
/* ki_flags bits */
/*
@@ -90,6 +91,7 @@ struct kiocb {
union {
void __user *user;
struct task_struct *tsk;
+ void (*complete)(u64 user_data, long res);
} ki_obj;
__u64 ki_user_data; /* user's data for completion */
@@ -118,6 +120,7 @@ struct kiocb {
};
#define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY)
+#define is_kernel_kiocb(iocb) ((iocb)->ki_key == KIOCB_KERNEL_KEY)
#define init_sync_kiocb(x, filp) \
do { \
struct task_struct *tsk = current; \
@@ -204,6 +207,14 @@ extern int aio_put_req(struct kiocb *iocb);
extern int aio_complete(struct kiocb *iocb, long res, long res2);
struct mm_struct;
extern void exit_aio(struct mm_struct *mm);
+struct kiocb *aio_kernel_alloc(gfp_t gfp);
+void aio_kernel_free(struct kiocb *iocb);
+void aio_kernel_init_rw(struct kiocb *iocb, struct file *filp,
+ unsigned short op, void *ptr, size_t nr, loff_t off);
+void aio_kernel_init_callback(struct kiocb *iocb,
+ void (*complete)(u64 user_data, long res),
+ u64 user_data);
+int aio_kernel_submit(struct kiocb *iocb);
#else
static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
static inline int aio_put_req(struct kiocb *iocb) { return 0; }
--
1.6.2.5
next prev parent reply other threads:[~2009-10-22 20:47 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-10-22 20:25 [RFC] loop: issue aio with pages Zach Brown
2009-10-22 20:25 ` [PATCH 1/8] gadgetfs: use schedule_work() instead of EIOCBRETRY Zach Brown
2009-10-22 20:25 ` [PATCH 2/8] aio: disable retry Zach Brown
2009-10-22 20:25 ` Zach Brown [this message]
2009-10-22 20:25 ` [PATCH 4/8] aio: add aio_read_pages and aio_write_pages Zach Brown
2009-10-22 20:25 ` [PATCH 5/8] dio: refactor __blockdev_direct_IO() Zach Brown
2009-10-22 20:25 ` [PATCH 6/8] dio: add an entry point which takes pages Zach Brown
2009-10-22 20:25 ` [PATCH 7/8] block: provide aio_read_pages and aio_write_pages Zach Brown
2009-10-22 20:25 ` [PATCH 8/8] loop: use aio to perform io on the underlying file Zach Brown
2009-10-27 16:01 ` Jeff Moyer
2009-10-27 15:49 ` [PATCH 6/8] dio: add an entry point which takes pages Jeff Moyer
2009-10-27 17:50 ` Zach Brown
2009-10-27 15:39 ` [PATCH 5/8] dio: refactor __blockdev_direct_IO() Jeff Moyer
2009-10-26 16:17 ` [PATCH 4/8] aio: add aio_read_pages and aio_write_pages Jeff Moyer
2009-10-26 17:08 ` Jeff Moyer
2009-10-26 22:22 ` Zach Brown
2009-10-26 16:10 ` [PATCH 3/8] aio: add an interface to submit aio from the kernel Jeff Moyer
2009-10-26 22:21 ` Zach Brown
2009-10-25 7:37 ` [PATCH 2/8] aio: disable retry Christoph Hellwig
2009-10-26 22:15 ` Zach Brown
2009-10-26 16:00 ` Jeff Moyer
2009-10-26 15:57 ` [PATCH 1/8] gadgetfs: use schedule_work() instead of EIOCBRETRY Jeff Moyer
2009-10-25 7:36 ` [RFC] loop: issue aio with pages Christoph Hellwig
2009-10-26 22:13 ` Zach Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1256243157-16667-4-git-send-email-zach.brown@oracle.com \
--to=zach.brown@oracle.com \
--cc=linux-fsdevel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).