From: Dave Kleikamp <dave.kleikamp@oracle.com>
To: linux-kernel@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org,
Andrew Morton <akpm@linux-foundation.org>,
"Maxim V. Patlasov" <mpatlasov@parallels.com>,
Zach Brown <zab@zabbo.net>, Christoph Hellwig <hch@infradead.org>
Subject: [PATCH V9.1 17/33] loop: use aio to perform io on the underlying file
Date: Fri, 18 Oct 2013 12:55:06 -0500 [thread overview]
Message-ID: <526175FA.5050501@oracle.com> (raw)
In-Reply-To: <1381932286-14978-18-git-send-email-dave.kleikamp@oracle.com>
[I made a mistake with my last change to this patch. I misunderstood
what bdev_io_min() was all about. I needed to stick with
logical_block_size.]
This uses the new kernel aio interface to process loopback IO by
submitting concurrent direct aio. Previously loop's IO was serialized
by synchronous processing in a thread.
The aio operations specify the memory for the IO with the bio_vec arrays
directly instead of mappings of the pages.
The use of aio operations is enabled when the backing file supports the
read_iter, write_iter and direct_IO methods.
Signed-off-by: Zach Brown <zab@zabbo.net>
Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
---
drivers/block/loop.c | 158 ++++++++++++++++++++++++++++++++++------------
include/uapi/linux/loop.h | 1 +
2 files changed, 119 insertions(+), 40 deletions(-)
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 40e7155..e564769 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -75,6 +75,7 @@
#include <linux/sysfs.h>
#include <linux/miscdevice.h>
#include <linux/falloc.h>
+#include <linux/aio.h>
#include "loop.h"
#include <asm/uaccess.h>
@@ -218,6 +219,48 @@ lo_do_transfer(struct loop_device *lo, int cmd,
return lo->transfer(lo, cmd, rpage, roffs, lpage, loffs, size, rblock);
}
+#ifdef CONFIG_AIO
+static void lo_rw_aio_complete(u64 data, long res)
+{
+ struct bio *bio = (struct bio *)(uintptr_t)data;
+
+ if (res > 0)
+ res = 0;
+ else if (res < 0)
+ res = -EIO;
+
+ bio_endio(bio, res);
+}
+
+static int lo_rw_aio(struct loop_device *lo, struct bio *bio)
+{
+ struct file *file = lo->lo_backing_file;
+ struct kiocb *iocb;
+ unsigned int op;
+ struct iov_iter iter;
+ struct bio_vec *bvec;
+ size_t nr_segs;
+ loff_t pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
+
+ iocb = aio_kernel_alloc(GFP_NOIO);
+ if (!iocb)
+ return -ENOMEM;
+
+ if (bio_rw(bio) & WRITE)
+ op = IOCB_CMD_WRITE_ITER;
+ else
+ op = IOCB_CMD_READ_ITER;
+
+ bvec = bio_iovec_idx(bio, bio->bi_idx);
+ nr_segs = bio_segments(bio);
+ iov_iter_init_bvec(&iter, bvec, nr_segs, bvec_length(bvec, nr_segs), 0);
+ aio_kernel_init_rw(iocb, file, iov_iter_count(&iter), pos);
+ aio_kernel_init_callback(iocb, lo_rw_aio_complete, (u64)(uintptr_t)bio);
+
+ return aio_kernel_submit(iocb, op, &iter);
+}
+#endif /* CONFIG_AIO */
+
/**
* __do_lo_send_write - helper for writing data to a loop device
*
@@ -418,50 +461,33 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
if (bio_rw(bio) == WRITE) {
- struct file *file = lo->lo_backing_file;
-
- if (bio->bi_rw & REQ_FLUSH) {
- ret = vfs_fsync(file, 0);
- if (unlikely(ret && ret != -EINVAL)) {
- ret = -EIO;
- goto out;
- }
- }
+ ret = lo_send(lo, bio, pos);
+ } else
+ ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
- /*
- * We use punch hole to reclaim the free space used by the
- * image a.k.a. discard. However we do not support discard if
- * encryption is enabled, because it may give an attacker
- * useful information.
- */
- if (bio->bi_rw & REQ_DISCARD) {
- struct file *file = lo->lo_backing_file;
- int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+ return ret;
+}
- if ((!file->f_op->fallocate) ||
- lo->lo_encrypt_key_size) {
- ret = -EOPNOTSUPP;
- goto out;
- }
- ret = file->f_op->fallocate(file, mode, pos,
- bio->bi_size);
- if (unlikely(ret && ret != -EINVAL &&
- ret != -EOPNOTSUPP))
- ret = -EIO;
- goto out;
- }
+static int lo_discard(struct loop_device *lo, struct bio *bio)
+{
+ struct file *file = lo->lo_backing_file;
+ int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+ loff_t pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
+ int ret;
- ret = lo_send(lo, bio, pos);
+ /*
+ * We use punch hole to reclaim the free space used by the
+ * image a.k.a. discard. However we do not support discard if
+ * encryption is enabled, because it may give an attacker
+ * useful information.
+ */
- if ((bio->bi_rw & REQ_FUA) && !ret) {
- ret = vfs_fsync(file, 0);
- if (unlikely(ret && ret != -EINVAL))
- ret = -EIO;
- }
- } else
- ret = lo_receive(lo, bio, lo->lo_blocksize, pos);
+ if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size)
+ return -EOPNOTSUPP;
-out:
+ ret = file->f_op->fallocate(file, mode, pos, bio->bi_size);
+ if (unlikely(ret && ret != -EINVAL && ret != -EOPNOTSUPP))
+ ret = -EIO;
return ret;
}
@@ -525,7 +551,35 @@ static inline void loop_handle_bio(struct loop_device *lo, struct bio *bio)
do_loop_switch(lo, bio->bi_private);
bio_put(bio);
} else {
- int ret = do_bio_filebacked(lo, bio);
+ int ret;
+
+ if (bio_rw(bio) == WRITE) {
+ if (bio->bi_rw & REQ_FLUSH) {
+ ret = vfs_fsync(lo->lo_backing_file, 1);
+ if (unlikely(ret && ret != -EINVAL))
+ goto out;
+ }
+ if (bio->bi_rw & REQ_DISCARD) {
+ ret = lo_discard(lo, bio);
+ goto out;
+ }
+ }
+#ifdef CONFIG_AIO
+ if (lo->lo_flags & LO_FLAGS_USE_AIO &&
+ lo->transfer == transfer_none) {
+ ret = lo_rw_aio(lo, bio);
+ if (ret == 0)
+ return;
+ } else
+#endif
+ ret = do_bio_filebacked(lo, bio);
+
+ if ((bio_rw(bio) == WRITE) && bio->bi_rw & REQ_FUA && !ret) {
+ ret = vfs_fsync(lo->lo_backing_file, 0);
+ if (unlikely(ret && ret != -EINVAL))
+ ret = -EIO;
+ }
+out:
bio_endio(bio, ret);
}
}
@@ -547,6 +601,12 @@ static int loop_thread(void *data)
struct loop_device *lo = data;
struct bio *bio;
+ /*
+ * In cases where the underlying filesystem calls balance_dirty_pages()
+ * we want less throttling to avoid lock ups trying to write dirty
+ * pages through the loop device
+ */
+ current->flags |= PF_LESS_THROTTLE;
set_user_nice(current, -20);
while (!kthread_should_stop() || !bio_list_empty(&lo->lo_bio_list)) {
@@ -869,6 +929,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
!file->f_op->write)
lo_flags |= LO_FLAGS_READ_ONLY;
+#ifdef CONFIG_AIO
+ if (file->f_op->write_iter && file->f_op->read_iter &&
+ mapping->a_ops->direct_IO) {
+ file->f_flags |= O_DIRECT;
+ lo_flags |= LO_FLAGS_USE_AIO;
+ }
+#endif
+
lo_blocksize = S_ISBLK(inode->i_mode) ?
inode->i_bdev->bd_block_size : PAGE_SIZE;
@@ -912,6 +980,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
set_blocksize(bdev, lo_blocksize);
+#ifdef CONFIG_AIO
+ /*
+ * We must not send too-small direct-io requests, so we inherit
+ * the logical block size from the underlying device
+ */
+ if ((lo_flags & LO_FLAGS_USE_AIO) && inode->i_sb->s_bdev)
+ blk_queue_logical_block_size(lo->lo_queue,
+ bdev_logical_block_size(inode->i_sb->s_bdev));
+#endif
+
lo->lo_thread = kthread_create(loop_thread, lo, "loop%d",
lo->lo_number);
if (IS_ERR(lo->lo_thread)) {
diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h
index e0cecd2..6edc6b6 100644
--- a/include/uapi/linux/loop.h
+++ b/include/uapi/linux/loop.h
@@ -21,6 +21,7 @@ enum {
LO_FLAGS_READ_ONLY = 1,
LO_FLAGS_AUTOCLEAR = 4,
LO_FLAGS_PARTSCAN = 8,
+ LO_FLAGS_USE_AIO = 16,
};
#include <asm/posix_types.h> /* for __kernel_old_dev_t */
--
1.8.4.1
next prev parent reply other threads:[~2013-10-18 17:55 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-16 14:04 [PATCH V9 00/33] loop: Issue O_DIRECT aio using bio_vec Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 01/33] iov_iter: move into its own file Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 02/33] iov_iter: iov_iter_copy_from_user() should use non-atomic copy Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 03/33] iov_iter: add copy_to_user support Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 04/33] iov_iter: add __iovec_copy_to_user() Dave Kleikamp
[not found] ` <1381932286-14978-1-git-send-email-dave.kleikamp-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2013-10-16 14:04 ` [PATCH V9 05/33] fuse: convert fuse to use iov_iter_copy_[to|from]_user Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 06/33] iov_iter: hide iovec details behind ops function pointers Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 07/33] iov_iter: ii_iovec_copy_to_user should pre-fault user pages Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 08/33] iov_iter: add bvec support Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 09/33] iov_iter: add a shorten call Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 10/33] iov_iter: let callers extract iovecs and bio_vecs Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 11/33] dio: Convert direct_IO to use iov_iter Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 12/33] dio: add bio_vec support to __blockdev_direct_IO() Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 13/33] fs: pull iov_iter use higher up the stack Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 14/33] aio: add aio_kernel_() interface Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 15/33] aio: add aio support for iov_iter arguments Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 16/33] bio: add bvec_length(), like iov_length() Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 17/33] loop: use aio to perform io on the underlying file Dave Kleikamp
2013-10-18 17:55 ` Dave Kleikamp [this message]
2013-10-16 14:04 ` [PATCH V9 18/33] fs: create file_readable() and file_writable() functions Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 19/33] fs: use read_iter and write_iter rather than aio_read and aio_write Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 20/33] fs: add read_iter and write_iter to several file systems Dave Kleikamp
2013-10-16 14:04 ` [Ocfs2-devel] [PATCH V9 21/33] ocfs2: add support for read_iter and write_iter Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 22/33] ext4: " Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 23/33] nfs: add support for read_iter, write_iter Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 24/33] nfs: simplify swap Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 25/33] btrfs: add support for read_iter and write_iter Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 26/33] block_dev: add support for read_iter, write_iter Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 27/33] xfs: add support for read_iter and write_iter Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [Cluster-devel] [PATCH V9 28/33] gfs2: Convert aio_read/write ops to read/write_iter Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 29/33] udf: convert file ops from aio_read/write " Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 30/33] afs: add support for read_iter and write_iter Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 31/33] ecrpytfs: Convert aio_read/write ops to read/write_iter Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 32/33] ubifs: convert file ops from aio_read/write " Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` Dave Kleikamp
2013-10-16 14:04 ` [PATCH V9 33/33] tmpfs: add support for read_iter and write_iter Dave Kleikamp
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=526175FA.5050501@oracle.com \
--to=dave.kleikamp@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=hch@infradead.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mpatlasov@parallels.com \
--cc=zab@zabbo.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.