linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dave Kleikamp <dave.kleikamp@oracle.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Zach Brown <zab@zabbo.net>,
	Dave Kleikamp <dave.kleikamp@oracle.com>
Subject: [RFC PATCH 13/22] dio: add __blockdev_direct_IO_bdev()
Date: Mon, 27 Feb 2012 15:19:27 -0600	[thread overview]
Message-ID: <1330377576-3659-14-git-send-email-dave.kleikamp@oracle.com> (raw)
In-Reply-To: <1330377576-3659-1-git-send-email-dave.kleikamp@oracle.com>

From: Zach Brown <zab@zabbo.net>

Previous patches refactored __blockdev_direct_IO() to call helper
functions while iterating over the user's iovec.  This adds a
__blockdev_direct_IO() which is the same except that it iterates over
the pages in a bio_vec instead of user addresses in an iovec.

The trick here is to initialize the dio state so that do_direct_IO()
consumes the pages we provide and never tries to map user pages.  This
is done by making sure that final_block_in_request covers the page that
we set in the dio.  do_direct_IO() will return before running out of
pages.

The caller is responsible for dirtying these pages, if needed.  We add
an option to the dio struct that makes sure we only dirty pages when
we're operating on iovecs of user addresses.

Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Cc: Zach Brown <zab@zabbo.net>
---
 fs/direct-io.c     |   88 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fs.h |   26 ++++++++++++++++
 2 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 20bb84c..2fef85f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -126,6 +126,7 @@ struct dio {
 	spinlock_t bio_lock;		/* protects BIO fields below */
 	int page_errors;		/* errno from get_user_pages() */
 	int is_async;			/* is IO async ? */
+	int should_dirty;		/* should we mark read pages dirty? */
 	int io_error;			/* IO error in completion path */
 	unsigned long refcount;		/* direct_io_worker() and bios */
 	struct bio *bio_list;		/* singly linked via bi_private */
@@ -420,7 +421,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
 	dio->refcount++;
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
-	if (dio->is_async && dio->rw == READ)
+	if (dio->is_async && dio->rw == READ && dio->should_dirty)
 		bio_set_pages_dirty(bio);
 
 	if (sdio->submit_io)
@@ -491,13 +492,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 	if (!uptodate)
 		dio->io_error = -EIO;
 
-	if (dio->is_async && dio->rw == READ) {
+	if (dio->is_async && dio->rw == READ && dio->should_dirty) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
 			struct page *page = bvec[page_no].bv_page;
 
-			if (dio->rw == READ && !PageCompound(page))
+			if (dio->rw == READ && !PageCompound(page) &&
+			    dio->should_dirty)
 				set_page_dirty_lock(page);
 			page_cache_release(page);
 		}
@@ -1336,6 +1338,8 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 				PAGE_SIZE - user_addr / PAGE_SIZE);
 	}
 
+	dio->should_dirty = 1;
+
 	for (seg = 0; seg < nr_segs; seg++) {
 		user_addr = (unsigned long)iov[seg].iov_base;
 		sdio.size += bytes = iov[seg].iov_len;
@@ -1400,6 +1404,84 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 
 EXPORT_SYMBOL(__blockdev_direct_IO);
 
+ssize_t
+__blockdev_direct_IO_bvec(int rw, struct kiocb *iocb, struct inode *inode,
+	struct block_device *bdev, struct bio_vec *bvec, loff_t offset,
+	unsigned long bvec_len, get_block_t get_block,
+	dio_iodone_t end_io, dio_submit_t submit_io, int flags)
+{
+	unsigned blkbits = inode->i_blkbits;
+	ssize_t retval = -EINVAL;
+	loff_t end = offset;
+	struct dio *dio;
+	struct dio_submit sdio = { 0, };
+	unsigned long i;
+	struct buffer_head map_bh = { 0, };
+
+	if (rw & WRITE)
+		rw = WRITE_ODIRECT;
+
+	if (!dio_aligned(offset, &blkbits, bdev))
+		goto out;
+
+	/* Check the memory alignment.  Blocks cannot straddle pages */
+	for (i = 0; i < bvec_len; i++) {
+		end += bvec[i].bv_len;
+		if (!dio_aligned(bvec[i].bv_len | bvec[i].bv_offset,
+				 &blkbits, bdev))
+			goto out;
+	}
+
+	dio = dio_alloc_init(flags, rw, iocb, inode, end_io, end);
+	retval = -ENOMEM;
+	if (!dio)
+		goto out;
+
+	retval = dio_lock_and_flush(dio, offset, end);
+	if (retval) {
+		kmem_cache_free(dio_cache, dio);
+		goto out;
+	}
+
+	sdio_init(&sdio, inode, offset, blkbits, get_block, submit_io);
+
+	sdio.pages_in_io = bvec_len;
+
+	for (i = 0; i < bvec_len; i++) {
+		sdio.size += bvec[i].bv_len;
+
+		/* Index into the first page of the first block */
+		sdio.first_block_in_page = bvec[i].bv_offset >> blkbits;
+		sdio.final_block_in_request = sdio.block_in_file +
+						(bvec[i].bv_len  >> blkbits);
+		/* Page fetching state */
+		sdio.curr_page = 0;
+		page_cache_get(bvec[i].bv_page);
+		dio->pages[0] = bvec[i].bv_page;
+		sdio.head = 0;
+		sdio.tail = 1;
+
+		sdio.total_pages = 1;
+		sdio.curr_user_address = 0;
+
+		retval = do_direct_IO(dio, &sdio, &map_bh);
+
+		dio->result += bvec[i].bv_len -
+			((sdio.final_block_in_request - sdio.block_in_file) <<
+					blkbits);
+
+		if (retval) {
+			dio_cleanup(dio, &sdio);
+			break;
+		}
+	}
+
+	retval = dio_post_submission(rw, offset, dio, &sdio, &map_bh, retval);
+out:
+	return retval;
+}
+EXPORT_SYMBOL(__blockdev_direct_IO_bvec);
+
 static __init int dio_init(void)
 {
 	dio_cache = KMEM_CACHE(dio, SLAB_PANIC);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4750933..94f2d0a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -692,6 +692,8 @@ struct address_space_operations {
 	void (*freepage)(struct page *);
 	ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs);
+	ssize_t (*direct_IO_bvec)(int, struct kiocb *, struct bio_vec *bvec,
+			loff_t offset, unsigned long bvec_len);
 	int (*get_xip_mem)(struct address_space *, pgoff_t, int,
 						void **, unsigned long *);
 	/*
@@ -2530,6 +2532,30 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 				    offset, nr_segs, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
+
+ssize_t __blockdev_direct_IO_bvec(int rw, struct kiocb *iocb,
+	struct inode *inode, struct block_device *bdev, struct bio_vec *bvec,
+	loff_t offset, unsigned long bvec_len, get_block_t get_block,
+	dio_iodone_t end_io, dio_submit_t submit_io, int flags);
+
+static inline ssize_t blockdev_direct_IO_bvec(int rw, struct kiocb *iocb,
+	struct inode *inode, struct block_device *bdev, struct bio_vec *bvec,
+	loff_t offset, unsigned long bvec_len, get_block_t get_block,
+	dio_iodone_t end_io)
+{
+	return __blockdev_direct_IO_bvec(rw, iocb, inode, bdev, bvec, offset,
+				bvec_len, get_block, end_io, NULL,
+				DIO_LOCKING | DIO_SKIP_HOLES);
+}
+
+static inline ssize_t blockdev_direct_IO_bvec_no_locking(int rw,
+	struct kiocb *iocb, struct inode *inode, struct block_device *bdev,
+	struct bio_vec *bvec, loff_t offset, unsigned long bvec_len,
+	get_block_t get_block, dio_iodone_t end_io)
+{
+	return __blockdev_direct_IO_bvec(rw, iocb, inode, bdev, bvec, offset,
+				bvec_len, get_block, end_io, NULL, 0);
+}
 #else
 static inline void inode_dio_wait(struct inode *inode)
 {
-- 
1.7.9.2

  parent reply	other threads:[~2012-02-27 21:19 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-02-27 21:19 [RFC PATCH 00/22] loop: Issue O_DIRECT aio with pages Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 01/22] iov_iter: move into its own file Dave Kleikamp
2012-03-01 20:25   ` Jeff Moyer
2012-02-27 21:19 ` [RFC PATCH 02/22] iov_iter: add copy_to_user support Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 03/22] fuse: convert fuse to use iov_iter_copy_[to|from]_user Dave Kleikamp
     [not found]   ` <1330377576-3659-4-git-send-email-dave.kleikamp-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-02-28  9:09     ` Miklos Szeredi
2012-02-27 21:19 ` [RFC PATCH 04/22] iov_iter: hide iovec details behind ops function pointers Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 05/22] iov_iter: add bvec support Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 06/22] iov_iter: add a shorten call Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 07/22] iov_iter: let callers extract iovecs and bio_vecs Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 08/22] dio: create a dio_aligned() helper function Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 09/22] dio: add dio_alloc_init() " Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 10/22] dio: add sdio_init() " Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 11/22] dio: add dio_lock_and_flush() helper Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 12/22] dio: add dio_post_submission() helper function Dave Kleikamp
2012-02-27 21:19 ` Dave Kleikamp [this message]
2012-02-27 22:16   ` [RFC PATCH 13/22] dio: add __blockdev_direct_IO_bdev() Zach Brown
2012-02-27 21:19 ` [RFC PATCH 14/22] fs: pull iov_iter use higher up the stack Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 15/22] aio: add aio_kernel_() interface Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 16/22] aio: add aio support for iov_iter arguments Dave Kleikamp
2012-02-27 22:13   ` Zach Brown
2012-02-27 21:19 ` [RFC PATCH 17/22] bio: add bvec_length(), like iov_length() Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 18/22] ext3: add support for .read_iter and .write_iter Dave Kleikamp
2012-02-27 22:34   ` Zach Brown
2012-02-27 23:14     ` Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 19/22] ocfs2: add support for read_iter, write_iter, and direct_IO_bvec Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 20/22] ext4: " Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 21/22] btrfs: " Dave Kleikamp
2012-02-27 21:19 ` [RFC PATCH 22/22] nfs: add support for read_iter, write_iter Dave Kleikamp
     [not found]   ` <1330377576-3659-23-git-send-email-dave.kleikamp-QHcLZuEGTsvQT0dZR+AlfA@public.gmane.org>
2012-02-27 22:08     ` Myklebust, Trond
2012-02-27 23:17       ` Dave Kleikamp
2012-02-27 22:27 ` [RFC PATCH 00/22] loop: Issue O_DIRECT aio with pages Zach Brown
2012-02-27 22:53   ` Dave Kleikamp
2012-02-28  9:29 ` Christoph Hellwig
2012-02-28 15:14   ` Zach Brown
2012-02-29  9:08     ` Christoph Hellwig
2012-03-01 20:33       ` Jeff Moyer
2012-03-01 20:36         ` Dave Kleikamp

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1330377576-3659-14-git-send-email-dave.kleikamp@oracle.com \
    --to=dave.kleikamp@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=zab@zabbo.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).