From: Ming Lei <ming.lei@canonical.com>
To: Jens Axboe <axboe@kernel.dk>,
linux-kernel@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Zach Brown <zab@zabbo.net>, Benjamin LaHaise <bcrl@kvack.org>,
Christoph Hellwig <hch@infradead.org>,
Kent Overstreet <kmo@daterainc.com>,
linux-aio@kvack.org, linux-fsdevel@vger.kernel.org,
Dave Chinner <david@fromorbit.com>,
Ming Lei <ming.lei@canonical.com>
Subject: [PATCH v1 9/9] block: loop: support to submit I/O via kernel aio based
Date: Thu, 14 Aug 2014 23:50:40 +0800 [thread overview]
Message-ID: <1408031441-31156-10-git-send-email-ming.lei@canonical.com> (raw)
In-Reply-To: <1408031441-31156-1-git-send-email-ming.lei@canonical.com>
Part of the patch is based on Dave's previous post.
It is easy to observe that loop block device thoughput
can be increased by > 100% in single job randread,
libaio engine, direct I/O fio test.
Cc: Zach Brown <zab@zabbo.net>
Cc: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
---
drivers/block/loop.c | 121 ++++++++++++++++++++++++++++++++++++++++-----
drivers/block/loop.h | 1 +
include/uapi/linux/loop.h | 1 +
3 files changed, 112 insertions(+), 11 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 0ce51ee..b57f603 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -76,6 +76,7 @@
#include <linux/miscdevice.h>
#include <linux/falloc.h>
#include <linux/blk-mq.h>
+#include <linux/aio.h>
#include "loop.h"
#include <asm/uaccess.h>
@@ -451,22 +452,99 @@ static int lo_req_flush(struct loop_device *lo, struct request *rq)
return ret;
}
-static int do_req_filebacked(struct loop_device *lo, struct request *rq)
+#ifdef CONFIG_AIO
+static void lo_rw_aio_complete(u64 data, long res)
+{
+ struct loop_cmd *cmd = (struct loop_cmd *)(uintptr_t)data;
+ struct request *rq = cmd->rq;
+
+ if (res > 0)
+ res = 0;
+ else if (res < 0)
+ res = -EIO;
+
+ rq->errors = res;
+ aio_kernel_free(cmd->iocb);
+ blk_mq_complete_request(rq);
+}
+
+static int lo_rw_aio(struct loop_device *lo, struct loop_cmd *cmd,
+ bool write, loff_t pos)
+{
+ struct file *file = lo->lo_backing_file;
+ struct request *rq = cmd->rq;
+ struct kiocb *iocb;
+ unsigned int i = 0;
+ struct iov_iter iter;
+ struct bio_vec *bvec, bv;
+ size_t nr_segs = 0;
+ struct req_iterator r_iter;
+ int ret = -EIO;
+
+ /* how many segments */
+ rq_for_each_segment(bv, rq, r_iter)
+ nr_segs++;
+
+ iocb = aio_kernel_alloc(GFP_NOIO, nr_segs * sizeof(*bvec));
+ if (!iocb) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cmd->iocb = iocb;
+ bvec = (struct bio_vec *)(iocb + 1);
+ rq_for_each_segment(bv, rq, r_iter)
+ bvec[i++] = bv;
+
+ iter.type = ITER_BVEC | (write ? WRITE : 0);
+ iter.bvec = bvec;
+ iter.nr_segs = nr_segs;
+ iter.count = blk_rq_bytes(rq);
+ iter.iov_offset = 0;
+
+ aio_kernel_init_rw(iocb, file, iov_iter_count(&iter), pos,
+ lo_rw_aio_complete, (u64)(uintptr_t)cmd);
+ ret = aio_kernel_submit(iocb, write, &iter);
+ out:
+ return ret;
+}
+#endif /* CONFIG_AIO */
+
+static int lo_io_rw(struct loop_device *lo, struct loop_cmd *cmd,
+ bool write, loff_t pos)
+{
+#ifdef CONFIG_AIO
+ if (lo->lo_flags & LO_FLAGS_USE_AIO)
+ return lo_rw_aio(lo, cmd, write, pos);
+#endif
+ if (write)
+ return lo_send(lo, cmd->rq, pos);
+ else
+ return lo_receive(lo, cmd->rq, lo->lo_blocksize, pos);
+}
+
+static int do_req_filebacked(struct loop_device *lo,
+ struct loop_cmd *cmd, bool *sync)
{
loff_t pos;
int ret;
+ struct request *rq = cmd->rq;
+ *sync = false;
pos = ((loff_t) blk_rq_pos(rq) << 9) + lo->lo_offset;
if (rq->cmd_flags & REQ_WRITE) {
- if (rq->cmd_flags & REQ_FLUSH)
+ if (rq->cmd_flags & REQ_FLUSH) {
ret = lo_req_flush(lo, rq);
- else if (rq->cmd_flags & REQ_DISCARD)
+ *sync = true;
+ } else if (rq->cmd_flags & REQ_DISCARD) {
ret = lo_discard(lo, rq, pos);
- else
- ret = lo_send(lo, rq, pos);
+ *sync = true;
+ } else {
+ ret = lo_io_rw(lo, cmd, true, pos);
+ }
} else
- ret = lo_receive(lo, rq, lo->lo_blocksize, pos);
+ ret = lo_io_rw(lo, cmd, false, pos);
return ret;
}
@@ -771,6 +849,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write)
lo_flags |= LO_FLAGS_READ_ONLY;
+#ifdef CONFIG_AIO
+ if (file->f_op->write_iter && file->f_op->read_iter &&
+ mapping->a_ops->direct_IO) {
+ file->f_flags |= O_DIRECT;
+ lo_flags |= LO_FLAGS_USE_AIO;
+ }
+#endif
+
lo_blocksize = S_ISBLK(inode->i_mode) ?
inode->i_bdev->bd_block_size : PAGE_SIZE;
@@ -804,6 +890,17 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_blocksize(bdev, lo_blocksize);
+#ifdef CONFIG_AIO
+ /*
+ * We must not send too-small direct-io requests, so we reflect
+ * the minimum io size to the loop device's logical block size
+ */
+ if ((lo_flags & LO_FLAGS_USE_AIO) && inode->i_sb->s_bdev)
+ blk_queue_logical_block_size(lo->lo_queue,
+ bdev_io_min(inode->i_sb->s_bdev));
+#endif
+
+
lo->lo_state = Lo_bound;
if (part_shift)
lo->lo_flags |= LO_FLAGS_PARTSCAN;
@@ -1503,19 +1600,21 @@ static void loop_queue_work(struct work_struct *work)
const bool write = cmd->rq->cmd_flags & REQ_WRITE;
struct loop_device *lo = cmd->lo;
int ret = -EIO;
+ bool sync = true;
if (lo->lo_state != Lo_bound)
- goto failed;
+ goto out;
if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY))
- goto failed;
+ goto out;
- ret = do_req_filebacked(lo, cmd->rq);
+ ret = do_req_filebacked(lo, cmd, &sync);
- failed:
+ out:
if (ret)
cmd->rq->errors = -EIO;
- blk_mq_complete_request(cmd->rq);
+ if (!(lo->lo_flags & LO_FLAGS_USE_AIO) || sync || ret)
+ blk_mq_complete_request(cmd->rq);
}
static int loop_init_request(void *data, struct request *rq,
diff --git a/drivers/block/loop.h b/drivers/block/loop.h
index be796c7..4004af5 100644
--- a/drivers/block/loop.h
+++ b/drivers/block/loop.h
@@ -65,6 +65,7 @@ struct loop_cmd {
struct work_struct work;
struct request *rq;
struct loop_device *lo;
+ struct kiocb *iocb;
};
/* Support for loadable transfer modules */
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index e0cecd2..6edc6b6 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -21,6 +21,7 @@ enum {
LO_FLAGS_READ_ONLY = 1,
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
+ LO_FLAGS_USE_AIO = 16,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
--
1.7.9.5
--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org. For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
next prev parent reply other threads:[~2014-08-14 15:50 UTC|newest]
Thread overview: 37+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-14 15:50 [PATCH v1 0/9] block & aio: kernel aio and loop mq conversion Ming Lei
2014-08-14 15:50 ` [PATCH v1 1/9] aio: add aio_kernel_() interface Ming Lei
2014-08-14 18:07 ` Zach Brown
2014-08-15 13:20 ` Ming Lei
2014-08-14 15:50 ` [PATCH v1 2/9] fd/direct-io: introduce should_dirty for kernel aio Ming Lei
2014-08-14 15:50 ` [PATCH v1 3/9] blk-mq: export blk_mq_freeze_queue and blk_mq_unfreeze_queue Ming Lei
2014-08-14 15:50 ` [PATCH v1 4/9] blk-mq: introduce init_flush_rq_fn callback in 'blk_mq_ops' Ming Lei
2014-08-15 16:19 ` Jens Axboe
2014-08-16 7:49 ` Ming Lei
2014-08-17 18:39 ` Jens Axboe
2014-08-14 15:50 ` [PATCH v1 5/9] block: loop: convert to blk-mq Ming Lei
2014-08-15 16:31 ` Christoph Hellwig
2014-08-15 16:36 ` Jens Axboe
2014-08-15 16:46 ` Jens Axboe
2014-08-16 8:06 ` Ming Lei
2014-08-17 17:48 ` Jens Axboe
2014-08-18 1:22 ` Ming Lei
2014-08-18 11:53 ` Ming Lei
2014-08-19 20:50 ` Jens Axboe
[not found] ` <CACVXFVP_q2MfZtjPAgXrjMJS2K6H2fTFtAe3ZJXBW83uEovqkQ@mail.gmail.com>
2014-08-20 16:09 ` Jens Axboe
[not found] ` <CACVXFVPxXrYi+m0bC7tEcfvDzhQ=Xnapkd+yGRXbKCktgi3Ofw@mail.gmail.com>
2014-08-21 2:58 ` Jens Axboe
[not found] ` <CACVXFVNEuEOXphJK5XGbAGRC9tL7iTv=PE_v+Dnw3CReAEkonw@mail.gmail.com>
2014-08-21 3:16 ` Jens Axboe
[not found] ` <CACVXFVOR0mzMWo+iPtU8jUvYgH+non=hQ0XaP0Z1Fu0qiSbJNA@mail.gmail.com>
2014-08-27 16:08 ` Maxim Patlasov
2014-08-27 16:29 ` Benjamin LaHaise
2014-08-27 17:19 ` Maxim Patlasov
2014-08-27 17:56 ` Zach Brown
2014-08-28 2:10 ` Ming Lei
2014-08-28 2:06 ` Ming Lei
2014-08-29 11:14 ` Maxim Patlasov
2014-08-14 15:50 ` [PATCH v1 6/9] block: loop: say goodby to bio Ming Lei
2014-08-14 15:50 ` [PATCH v1 7/9] block: loop: introduce lo_discard() and lo_req_flush() Ming Lei
2014-08-14 15:50 ` [PATCH v1 8/9] block: loop: don't handle REQ_FUA explicitly Ming Lei
2014-08-14 15:50 ` Ming Lei [this message]
2014-08-14 16:53 ` [PATCH v1 0/9] block & aio: kernel aio and loop mq conversion Jens Axboe
2014-08-15 12:59 ` Ming Lei
2014-08-15 13:11 ` Christoph Hellwig
2014-08-15 14:32 ` Ming Lei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1408031441-31156-10-git-send-email-ming.lei@canonical.com \
--to=ming.lei@canonical.com \
--cc=akpm@linux-foundation.org \
--cc=axboe@kernel.dk \
--cc=bcrl@kvack.org \
--cc=dave.kleikamp@oracle.com \
--cc=david@fromorbit.com \
--cc=hch@infradead.org \
--cc=kmo@daterainc.com \
--cc=linux-aio@kvack.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=zab@zabbo.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).