From: Matthew Wilcox <matthew.r.wilcox@intel.com>
To: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org,
linux-kernel@vger.kernel.org
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Subject: [PATCH 1/6] Add bdev_read_page() and bdev_write_page()
Date: Thu, 9 Jan 2014 21:39:46 -0500 [thread overview]
Message-ID: <cover.1387748522.git.matthew.r.wilcox@intel.com> (raw)
In-Reply-To: <1389321591-25455-1-git-send-email-matthew.r.wilcox@intel.com>
A block device driver may choose to provide a rw_page operation.
These will be called when the filesystem is attempting to do page sized
I/O to page cache pages (ie not for direct I/O). This does preclude
I/Os that are larger than page size, so this may only be a performance
gain for some devices.
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
fs/block_dev.c | 34 ++++++++++++++++++++++++++
fs/mpage.c | 66 ++++++++++++++++++++++++++++++++------------------
include/linux/blkdev.h | 4 +++
3 files changed, 80 insertions(+), 24 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e86823..660557c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -363,6 +363,40 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(blkdev_fsync);
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+ struct page *page)
+{
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+}
+EXPORT_SYMBOL_GPL(bdev_read_page);
+
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+ struct page *page, struct writeback_control *wbc)
+{
+ int result;
+ int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
+ if (result) {
+ if (PageSwapCache(page)) {
+ set_page_dirty(page);
+ ClearPageReclaim(page);
+ end_page_writeback(page);
+ } else {
+ redirty_page_for_writepage(wbc, page);
+ result = 0;
+ }
+ }
+ unlock_page(page);
+ return result;
+}
+EXPORT_SYMBOL_GPL(bdev_write_page);
+
/*
* pseudo-fs
*/
diff --git a/fs/mpage.c b/fs/mpage.c
index 0face1c..b8552ae 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -286,6 +286,11 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
alloc_new:
if (bio == NULL) {
+ if (first_hole == blocks_per_page) {
+ if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
+ page))
+ goto out;
+ }
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
GFP_KERNEL);
@@ -440,6 +445,35 @@ struct mpage_data {
unsigned use_writepage;
};
+/*
+ * We have our BIO, so we can now mark the buffers clean. Make
+ * sure to only clean buffers which we know we'll be writing.
+ */
+static void clean_buffers(struct page *page, unsigned first_unmapped)
+{
+ unsigned buffer_counter = 0;
+ struct buffer_head *bh, *head;
+ if (!page_has_buffers(page))
+ return;
+ head = page_buffers(page);
+ bh = head;
+
+ do {
+ if (buffer_counter++ == first_unmapped)
+ break;
+ clear_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
+
+ /*
+ * we cannot drop the bh if the page is not uptodate or a concurrent
+ * readpage would fail to serialize with the bh and it would read from
+ * disk before we reach the platter.
+ */
+ if (buffer_heads_over_limit && PageUptodate(page))
+ try_to_free_buffers(page);
+}
+
static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
void *data)
{
@@ -575,6 +609,13 @@ page_is_mapped:
alloc_new:
if (bio == NULL) {
+ if (first_unmapped == blocks_per_page) {
+ if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
+ page, wbc)) {
+ clean_buffers(page, first_unmapped);
+ goto out;
+ }
+ }
bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
if (bio == NULL)
@@ -592,30 +633,7 @@ alloc_new:
goto alloc_new;
}
- /*
- * OK, we have our BIO, so we can now mark the buffers clean. Make
- * sure to only clean buffers which we know we'll be writing.
- */
- if (page_has_buffers(page)) {
- struct buffer_head *head = page_buffers(page);
- struct buffer_head *bh = head;
- unsigned buffer_counter = 0;
-
- do {
- if (buffer_counter++ == first_unmapped)
- break;
- clear_buffer_dirty(bh);
- bh = bh->b_this_page;
- } while (bh != head);
-
- /*
- * we cannot drop the bh if the page is not uptodate
- * or a concurrent readpage would fail to serialize with the bh
- * and it would read from disk before we reach the platter.
- */
- if (buffer_heads_over_limit && PageUptodate(page))
- try_to_free_buffers(page);
- }
+ clean_buffers(page, first_unmapped);
BUG_ON(PageWriteback(page));
set_page_writeback(page);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1b135d4..50dc979 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1564,6 +1564,7 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
void (*release) (struct gendisk *, fmode_t);
+ int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
@@ -1582,6 +1583,9 @@ struct block_device_operations {
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
unsigned long);
+extern int bdev_read_page(struct block_device *, sector_t, struct page *);
+extern int bdev_write_page(struct block_device *, sector_t, struct page *,
+ struct writeback_control *);
#else /* CONFIG_BLOCK */
/*
* stubs for when the block layer is configured out
--
1.8.5.2
next prev parent reply other threads:[~2014-01-10 2:40 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-01-10 2:39 [PATCH 0/6] Page I/O Matthew Wilcox
2014-01-10 2:39 ` Matthew Wilcox [this message]
2014-01-10 2:39 ` [PATCH 2/6] Factor page_endio() out of mpage_end_io() Matthew Wilcox
2014-01-10 2:39 ` [PATCH 3/6] swap: Use bdev_read_page() / bdev_write_page() Matthew Wilcox
2014-01-10 2:39 ` [PATCH 4/6] brd: Add support for rw_page Matthew Wilcox
2014-01-10 2:39 ` [PATCH 5/6] virtio_blk: Add rw_page implementation Matthew Wilcox
2014-01-10 2:39 ` [PATCH 6/6] NVMe: Add support for rw_page Matthew Wilcox
2014-01-10 15:24 ` [PATCH 0/6] Page I/O Jeff Moyer
2014-01-10 16:17 ` Matthew Wilcox
2014-01-15 0:04 ` Dave Chinner
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=cover.1387748522.git.matthew.r.wilcox@intel.com \
--to=matthew.r.wilcox@intel.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).