linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Zach Brown <zach.brown@oracle.com>
To: linux-fsdevel@vger.kernel.org
Subject: [PATCH 6/8] dio: add an entry point which takes pages
Date: Thu, 22 Oct 2009 13:25:55 -0700	[thread overview]
Message-ID: <1256243157-16667-7-git-send-email-zach.brown@oracle.com> (raw)
In-Reply-To: <1256243157-16667-6-git-send-email-zach.brown@oracle.com>

This adds a high level entry point into the direct-io code which calls helpers
on memory specified by pages instead of iovecs.

curr_user_address is used to decide if we should be dirtying the memory pages.
In our case, we don't want to.

The trick here is to initialize the dio state so that do_direct_IO() consumes
the pages we provide and never tries to map user pages.  This is done by making
sure that final_block_in_request covers all the pages we provide.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
---
 fs/direct-io.c     |   86 +++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/fs.h |    4 ++
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0a2ba8e..3551c4a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -109,7 +109,7 @@ struct dio {
 	 */
 	int curr_page;			/* changes */
 	int total_pages;		/* doesn't change */
-	unsigned long curr_user_address;/* changes */
+	unsigned long curr_user_address;/* changes, indicates user pages */
 
 	/*
 	 * Page queue.  These variables belong to dio_refill_pages() and
@@ -337,7 +337,7 @@ static void dio_bio_submit(struct dio *dio)
 	dio->refcount++;
 	spin_unlock_irqrestore(&dio->bio_lock, flags);
 
-	if (dio->is_async && dio->rw == READ)
+	if (dio->is_async && dio->rw == READ && dio->curr_user_address)
 		bio_set_pages_dirty(bio);
 
 	submit_bio(dio->rw, bio);
@@ -403,13 +403,14 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
 	if (!uptodate)
 		dio->io_error = -EIO;
 
-	if (dio->is_async && dio->rw == READ) {
+	if (dio->is_async && dio->rw == READ && dio->curr_user_address) {
 		bio_check_pages_dirty(bio);	/* transfers ownership */
 	} else {
 		for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
 			struct page *page = bvec[page_no].bv_page;
 
-			if (dio->rw == READ && !PageCompound(page))
+			if (dio->rw == READ && !PageCompound(page) && 
+			    dio->curr_user_address)
 				set_page_dirty_lock(page);
 			page_cache_release(page);
 		}
@@ -1248,3 +1249,80 @@ out:
 	return ret;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
+
+ssize_t
+__blockdev_direct_IO_pages(int rw, struct kiocb *iocb, struct inode *inode,
+	struct block_device *bdev, const struct bio_vec *bvec, 
+	unsigned long bvec_len, loff_t offset, get_block_t get_block,
+	dio_iodone_t end_io, int dio_lock_type)
+{
+	unsigned blkbits = inode->i_blkbits;
+	ssize_t ret;
+	loff_t end = offset;
+	struct dio *dio;
+	unsigned long i;
+
+	if (dio_unaligned(offset, &blkbits, bdev)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Check the memory alignment.  Blocks cannot straddle pages */
+	for (i = 0; i < bvec_len; i++) {
+		end += bvec[i].bv_len;
+		if (dio_unaligned(bvec[i].bv_len | bvec[i].bv_offset,
+				  &blkbits, bdev)) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
+	dio = dio_prepare(rw, iocb, inode, blkbits, offset, end, get_block,
+			  end_io, dio_lock_type);
+	if (!dio) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = dio_lock_and_flush(rw, inode, dio_lock_type, offset, end);
+	if (ret) {
+		kfree(dio);
+		goto out;
+	}
+
+	dio->pages_in_io = bvec_len;
+
+	for (i = 0; i < bvec_len; i++) {
+		dio->size += bvec[i].bv_len;
+
+		/* Index into the first page of the first block */
+		dio->first_block_in_page = bvec[i].bv_offset >> blkbits;
+		dio->final_block_in_request = dio->block_in_file +
+						(bvec[i].bv_len  >> blkbits);
+		/* Page fetching state */
+		dio->curr_page = 0;
+		page_cache_get(bvec[i].bv_page);
+		dio->pages[0] = bvec[i].bv_page;
+		dio->head = 0;
+		dio->tail = 1;
+
+		dio->total_pages = 1;
+		dio->curr_user_address = 0;
+	
+		ret = do_direct_IO(dio);
+
+		dio->result += bvec[i].bv_len -
+			((dio->final_block_in_request - dio->block_in_file) <<
+					blkbits);
+
+		if (ret) {
+			dio_cleanup(dio);
+			break;
+		}
+	}
+
+	ret = dio_post_submission(dio, offset, end, ret);
+out:
+	return ret;
+}
+EXPORT_SYMBOL(__blockdev_direct_IO_pages);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba15f0..01c0b71 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2266,6 +2266,10 @@ ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
 	int lock_type);
+ssize_t __blockdev_direct_IO_pages(int rw, struct kiocb *iocb,
+	struct inode *inode, struct block_device *bdev,
+	const struct bio_vec *bvec, unsigned long bvec_len, loff_t offset,
+	get_block_t get_block, dio_iodone_t end_io, int dio_lock_type);
 
 enum {
 	DIO_LOCKING = 1, /* need locking between buffered and direct access */
-- 
1.6.2.5


  reply	other threads:[~2009-10-22 20:47 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-22 20:25 [RFC] loop: issue aio with pages Zach Brown
2009-10-22 20:25 ` [PATCH 1/8] gadgetfs: use schedule_work() instead of EIOCBRETRY Zach Brown
2009-10-22 20:25   ` [PATCH 2/8] aio: disable retry Zach Brown
2009-10-22 20:25     ` [PATCH 3/8] aio: add an interface to submit aio from the kernel Zach Brown
2009-10-22 20:25       ` [PATCH 4/8] aio: add aio_read_pages and aio_write_pages Zach Brown
2009-10-22 20:25         ` [PATCH 5/8] dio: refactor __blockdev_direct_IO() Zach Brown
2009-10-22 20:25           ` Zach Brown [this message]
2009-10-22 20:25             ` [PATCH 7/8] block: provide aio_read_pages and aio_write_pages Zach Brown
2009-10-22 20:25               ` [PATCH 8/8] loop: use aio to perform io on the underlying file Zach Brown
2009-10-27 16:01                 ` Jeff Moyer
2009-10-27 15:49             ` [PATCH 6/8] dio: add an entry point which takes pages Jeff Moyer
2009-10-27 17:50               ` Zach Brown
2009-10-27 15:39           ` [PATCH 5/8] dio: refactor __blockdev_direct_IO() Jeff Moyer
2009-10-26 16:17         ` [PATCH 4/8] aio: add aio_read_pages and aio_write_pages Jeff Moyer
2009-10-26 17:08           ` Jeff Moyer
2009-10-26 22:22             ` Zach Brown
2009-10-26 16:10       ` [PATCH 3/8] aio: add an interface to submit aio from the kernel Jeff Moyer
2009-10-26 22:21         ` Zach Brown
2009-10-25  7:37     ` [PATCH 2/8] aio: disable retry Christoph Hellwig
2009-10-26 22:15       ` Zach Brown
2009-10-26 16:00     ` Jeff Moyer
2009-10-26 15:57   ` [PATCH 1/8] gadgetfs: use schedule_work() instead of EIOCBRETRY Jeff Moyer
2009-10-25  7:36 ` [RFC] loop: issue aio with pages Christoph Hellwig
2009-10-26 22:13   ` Zach Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1256243157-16667-7-git-send-email-zach.brown@oracle.com \
    --to=zach.brown@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).