All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Wilcox <matthew.r.wilcox@intel.com>
To: linux-fsdevel@vger.kernel.org, linux-mm@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Matthew Wilcox <matthew.r.wilcox@intel.com>
Subject: [PATCH 1/6] Add bdev_read_page() and bdev_write_page()
Date: Thu,  9 Jan 2014 21:39:46 -0500	[thread overview]
Message-ID: <cover.1387748522.git.matthew.r.wilcox@intel.com> (raw)
In-Reply-To: <1389321591-25455-1-git-send-email-matthew.r.wilcox@intel.com>

A block device driver may choose to provide a rw_page operation.
These will be called when the filesystem is attempting to do page sized
I/O to page cache pages (ie not for direct I/O).  This does preclude
I/Os that are larger than page size, so this may only be a performance
gain for some devices.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
---
 fs/block_dev.c         | 34 ++++++++++++++++++++++++++
 fs/mpage.c             | 66 ++++++++++++++++++++++++++++++++------------------
 include/linux/blkdev.h |  4 +++
 3 files changed, 80 insertions(+), 24 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1e86823..660557c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -363,6 +363,40 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(blkdev_fsync);
 
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+			struct page *page)
+{
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	if (!ops->rw_page)
+		return -EOPNOTSUPP;
+	return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+}
+EXPORT_SYMBOL_GPL(bdev_read_page);
+
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+			struct page *page, struct writeback_control *wbc)
+{
+	int result;
+	int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+	const struct block_device_operations *ops = bdev->bd_disk->fops;
+	if (!ops->rw_page)
+		return -EOPNOTSUPP;
+	result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
+	if (result) {
+		if (PageSwapCache(page)) {
+			set_page_dirty(page);
+			ClearPageReclaim(page);
+			end_page_writeback(page);
+		} else {
+			redirty_page_for_writepage(wbc, page);
+			result = 0;
+		}
+	}
+	unlock_page(page);
+	return result;
+}
+EXPORT_SYMBOL_GPL(bdev_write_page);
+
 /*
  * pseudo-fs
  */
diff --git a/fs/mpage.c b/fs/mpage.c
index 0face1c..b8552ae 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -286,6 +286,11 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 
 alloc_new:
 	if (bio == NULL) {
+		if (first_hole == blocks_per_page) {
+			if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
+								page))
+				goto out;
+		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 			  	min_t(int, nr_pages, bio_get_nr_vecs(bdev)),
 				GFP_KERNEL);
@@ -440,6 +445,35 @@ struct mpage_data {
 	unsigned use_writepage;
 };
 
+/*
+ * We have our BIO, so we can now mark the buffers clean.  Make
+ * sure to only clean buffers which we know we'll be writing.
+ */
+static void clean_buffers(struct page *page, unsigned first_unmapped)
+{
+	unsigned buffer_counter = 0;
+	struct buffer_head *bh, *head;
+	if (!page_has_buffers(page))
+		return;
+	head = page_buffers(page);
+	bh = head;
+
+	do {
+		if (buffer_counter++ == first_unmapped)
+			break;
+		clear_buffer_dirty(bh);
+		bh = bh->b_this_page;
+	} while (bh != head);
+
+	/*
+	 * we cannot drop the bh if the page is not uptodate or a concurrent
+	 * readpage would fail to serialize with the bh and it would read from
+	 * disk before we reach the platter.
+	 */
+	if (buffer_heads_over_limit && PageUptodate(page))
+		try_to_free_buffers(page);
+}
+
 static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
 		      void *data)
 {
@@ -575,6 +609,13 @@ page_is_mapped:
 
 alloc_new:
 	if (bio == NULL) {
+		if (first_unmapped == blocks_per_page) {
+			if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
+								page, wbc)) {
+				clean_buffers(page, first_unmapped);
+				goto out;
+			}
+		}
 		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
 				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
@@ -592,30 +633,7 @@ alloc_new:
 		goto alloc_new;
 	}
 
-	/*
-	 * OK, we have our BIO, so we can now mark the buffers clean.  Make
-	 * sure to only clean buffers which we know we'll be writing.
-	 */
-	if (page_has_buffers(page)) {
-		struct buffer_head *head = page_buffers(page);
-		struct buffer_head *bh = head;
-		unsigned buffer_counter = 0;
-
-		do {
-			if (buffer_counter++ == first_unmapped)
-				break;
-			clear_buffer_dirty(bh);
-			bh = bh->b_this_page;
-		} while (bh != head);
-
-		/*
-		 * we cannot drop the bh if the page is not uptodate
-		 * or a concurrent readpage would fail to serialize with the bh
-		 * and it would read from disk before we reach the platter.
-		 */
-		if (buffer_heads_over_limit && PageUptodate(page))
-			try_to_free_buffers(page);
-	}
+	clean_buffers(page, first_unmapped);
 
 	BUG_ON(PageWriteback(page));
 	set_page_writeback(page);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1b135d4..50dc979 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1564,6 +1564,7 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
 struct block_device_operations {
 	int (*open) (struct block_device *, fmode_t);
 	void (*release) (struct gendisk *, fmode_t);
+	int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
 	int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
 	int (*direct_access) (struct block_device *, sector_t,
@@ -1582,6 +1583,9 @@ struct block_device_operations {
 
 extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
 				 unsigned long);
+extern int bdev_read_page(struct block_device *, sector_t, struct page *);
+extern int bdev_write_page(struct block_device *, sector_t, struct page *,
+						struct writeback_control *);
 #else /* CONFIG_BLOCK */
 /*
  * stubs for when the block layer is configured out
-- 
1.8.5.2


  reply	other threads:[~2014-01-10  2:40 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-01-10  2:39 [PATCH 0/6] Page I/O Matthew Wilcox
2014-01-10  2:39 ` Matthew Wilcox [this message]
2014-01-10  2:39   ` [PATCH 2/6] Factor page_endio() out of mpage_end_io() Matthew Wilcox
2014-01-10  2:39   ` [PATCH 3/6] swap: Use bdev_read_page() / bdev_write_page() Matthew Wilcox
2014-01-10  2:39   ` [PATCH 4/6] brd: Add support for rw_page Matthew Wilcox
2014-01-10  2:39   ` [PATCH 5/6] virtio_blk: Add rw_page implementation Matthew Wilcox
2014-01-10  2:39   ` [PATCH 6/6] NVMe: Add support for rw_page Matthew Wilcox
2014-01-10 15:24 ` [PATCH 0/6] Page I/O Jeff Moyer
2014-01-10 16:17   ` Matthew Wilcox
2014-01-15  0:04   ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cover.1387748522.git.matthew.r.wilcox@intel.com \
    --to=matthew.r.wilcox@intel.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.