linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH, RFC] map multiple blocks at a time in mpage_readpage(s)
@ 2005-10-04 19:42 Christoph Hellwig
  2005-10-04 22:05 ` Dave Kleikamp
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Christoph Hellwig @ 2005-10-04 19:42 UTC (permalink / raw)
  To: linux-fsdevel

This patch changes mpage_readpage/mpage_readpages to use a get_blocks
call that gets the disk mapping information for multiple blocks at the
same time, similar to the way direct I/O code works.  For extent based
filesystems like jfs, xfs or reiser4 this allows to reduce the overhead
of the allocator calls, which is especially nice when that codepath
is rather heavyweight (as it is for example in xfs).  For filesystems
that don't have an allocator that make use of it nothing changes.

I've tested this heavily on XFS with block size = page size and 512 byte
blocks, and it passes the XFS QA regression test suite fine.  I've done
some basic fsx testing on all the other affected filesystems.


Index: linux-2.6/fs/ext3/inode.c
===================================================================
--- linux-2.6.orig/fs/ext3/inode.c	2005-10-04 19:51:00.000000000 +0200
+++ linux-2.6/fs/ext3/inode.c	2005-10-04 20:47:55.000000000 +0200
@@ -800,6 +800,18 @@
 	return ret;
 }
 
+static int
+ext3_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
+			struct buffer_head *bh_result, int create)
+{
+	int ret;
+
+	ret = ext3_get_block(inode, iblock, bh_result, create);
+	if (ret == 0)
+		bh_result->b_size = (1 << inode->i_blkbits);
+	return ret;
+}
+
 #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
 
 static int
@@ -1411,14 +1423,15 @@
 
 static int ext3_readpage(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, ext3_get_block);
+	return mpage_readpage(page, ext3_get_blocks, ext3_get_block);
 }
 
 static int
 ext3_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
+	return mpage_readpages(mapping, pages, nr_pages,
+			ext3_get_blocks, ext3_get_block);
 }
 
 static int ext3_invalidatepage(struct page *page, unsigned long offset)
Index: linux-2.6/fs/mpage.c
===================================================================
--- linux-2.6.orig/fs/mpage.c	2005-10-04 19:51:00.000000000 +0200
+++ linux-2.6/fs/mpage.c	2005-10-04 20:48:54.000000000 +0200
@@ -165,7 +165,9 @@
 
 static struct bio *
 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
-			sector_t *last_block_in_bio, get_block_t get_block)
+			sector_t *last_block_in_bio, struct buffer_head *map_bh,
+			unsigned long *first_logical_block, int *map_valid,
+			get_blocks_t get_blocks, get_block_t get_block)
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
@@ -177,29 +179,63 @@
 	unsigned page_block;
 	unsigned first_hole = blocks_per_page;
 	struct block_device *bdev = NULL;
-	struct buffer_head bh;
 	int length;
 	int fully_mapped = 1;
+	unsigned nblocks, i;
 
 	if (page_has_buffers(page))
 		goto confused;
 
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
+	page_block = 0;
+
+	/*
+	 * Map blocks using the result from the last get_blocks call first.
+	 */
+	nblocks = map_bh->b_size >> inode->i_blkbits;
+	if (*map_valid &&
+	    block_in_file > *first_logical_block &&
+	    block_in_file < (*first_logical_block + nblocks)) {
+		unsigned map_offset = block_in_file - *first_logical_block;
+		unsigned last = nblocks - map_offset;
+
+		for (i = 0; ; i++) {
+			if (i == last) {
+				*map_valid = 0;
+				break;
+			} else if (page_block == blocks_per_page)
+				break;
+			blocks[page_block] = map_bh->b_blocknr + map_offset + i;
+			page_block++;
+			block_in_file++;
+		}
+		bdev = map_bh->b_bdev;
+	}
+
+	/*
+	 * Then do more get_blocks calls until we are done with this page.
+	 */
+	map_bh->b_page = page;
+	while (page_block < blocks_per_page) {
+		map_bh->b_state = 0;
+		map_bh->b_size = 0;
 
-	bh.b_page = page;
-	for (page_block = 0; page_block < blocks_per_page;
-				page_block++, block_in_file++) {
-		bh.b_state = 0;
 		if (block_in_file < last_block) {
-			if (get_block(inode, block_in_file, &bh, 0))
+			if (get_blocks(inode, block_in_file,
+				       last_block - block_in_file, map_bh, 0))
 				goto confused;
+			*first_logical_block = block_in_file;
+			*map_valid  = 1;
 		}
 
-		if (!buffer_mapped(&bh)) {
+		if (!buffer_mapped(map_bh)) {
 			fully_mapped = 0;
 			if (first_hole == blocks_per_page)
 				first_hole = page_block;
+			page_block++;
+			block_in_file++;
+			*map_valid = 0;
 			continue;
 		}
 
@@ -209,8 +245,8 @@
 		 * we just collected from get_block into the page's buffers
 		 * so readpage doesn't have to repeat the get_block call
 		 */
-		if (buffer_uptodate(&bh)) {
-			map_buffer_to_page(page, &bh, page_block);
+		if (buffer_uptodate(map_bh)) {
+			map_buffer_to_page(page, map_bh, page_block);
 			goto confused;
 		}
 	
@@ -218,10 +254,20 @@
 			goto confused;		/* hole -> non-hole */
 
 		/* Contiguous blocks? */
-		if (page_block && blocks[page_block-1] != bh.b_blocknr-1)
+		if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
 			goto confused;
-		blocks[page_block] = bh.b_blocknr;
-		bdev = bh.b_bdev;
+		nblocks = map_bh->b_size >> inode->i_blkbits;
+		for (i = 0; ; i++) {
+			if (i == nblocks) {
+				*map_valid = 0;
+				break;
+			} else if (page_block == blocks_per_page)
+				break;
+			blocks[page_block] = map_bh->b_blocknr + i;
+			page_block++;
+			block_in_file++;
+		}
+		bdev = map_bh->b_bdev;
 	}
 
 	if (first_hole != blocks_per_page) {
@@ -260,7 +306,7 @@
 		goto alloc_new;
 	}
 
-	if (buffer_boundary(&bh) || (first_hole != blocks_per_page))
+	if (buffer_boundary(map_bh) || (first_hole != blocks_per_page))
 		bio = mpage_bio_submit(READ, bio);
 	else
 		*last_block_in_bio = blocks[blocks_per_page - 1];
@@ -325,12 +371,16 @@
  */
 int
 mpage_readpages(struct address_space *mapping, struct list_head *pages,
-				unsigned nr_pages, get_block_t get_block)
+				unsigned nr_pages, get_blocks_t get_blocks,
+				get_block_t get_block)
 {
 	struct bio *bio = NULL;
 	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
 	struct pagevec lru_pvec;
+	struct buffer_head map_bh;
+	unsigned long first_logical_block = 0;
+	int map_valid = 0;
 
 	pagevec_init(&lru_pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -342,7 +392,9 @@
 					page->index, GFP_KERNEL)) {
 			bio = do_mpage_readpage(bio, page,
 					nr_pages - page_idx,
-					&last_block_in_bio, get_block);
+					&last_block_in_bio, &map_bh,
+					&first_logical_block, &map_valid,
+					get_blocks, get_block);
 			if (!pagevec_add(&lru_pvec, page))
 				__pagevec_lru_add(&lru_pvec);
 		} else {
@@ -360,13 +412,18 @@
 /*
  * This isn't called much at all
  */
-int mpage_readpage(struct page *page, get_block_t get_block)
+int mpage_readpage(struct page *page, get_blocks_t get_blocks,
+		get_block_t get_block)
 {
 	struct bio *bio = NULL;
 	sector_t last_block_in_bio = 0;
+	struct buffer_head map_bh;
+	unsigned long first_logical_block = 0;
+	int map_valid = 0;
 
-	bio = do_mpage_readpage(bio, page, 1,
-			&last_block_in_bio, get_block);
+	bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio,
+			&map_bh, &first_logical_block, &map_valid,
+			get_blocks, get_block);
 	if (bio)
 		mpage_bio_submit(READ, bio);
 	return 0;
Index: linux-2.6/fs/xfs/linux-2.6/xfs_aops.c
===================================================================
--- linux-2.6.orig/fs/xfs/linux-2.6/xfs_aops.c	2005-10-04 19:51:00.000000000 +0200
+++ linux-2.6/fs/xfs/linux-2.6/xfs_aops.c	2005-10-04 19:52:49.000000000 +0200
@@ -1034,6 +1034,18 @@
 }
 
 STATIC int
+linvfs_get_blocks(
+	struct inode		*inode,
+	sector_t		iblock,
+	unsigned long		max_blocks,
+	struct buffer_head	*bh_result,
+	int			create)
+{
+	return __linvfs_get_block(inode, iblock, max_blocks, bh_result,
+					create, 0, BMAPI_WRITE);
+}
+
+STATIC int
 linvfs_get_blocks_direct(
 	struct inode		*inode,
 	sector_t		iblock,
@@ -1139,7 +1151,7 @@
 	struct file		*unused,
 	struct page		*page)
 {
-	return mpage_readpage(page, linvfs_get_block);
+	return mpage_readpage(page, linvfs_get_blocks, linvfs_get_block);
 }
 
 STATIC int
@@ -1149,7 +1161,8 @@
 	struct list_head	*pages,
 	unsigned		nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, linvfs_get_blocks,
+			linvfs_get_block);
 }
 
 STATIC void
Index: linux-2.6/include/linux/mpage.h
===================================================================
--- linux-2.6.orig/include/linux/mpage.h	2005-10-04 19:51:00.000000000 +0200
+++ linux-2.6/include/linux/mpage.h	2005-10-04 19:52:49.000000000 +0200
@@ -14,8 +14,9 @@
 typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
 
 int mpage_readpages(struct address_space *mapping, struct list_head *pages,
-				unsigned nr_pages, get_block_t get_block);
-int mpage_readpage(struct page *page, get_block_t get_block);
+				unsigned nr_pages, get_blocks_t get_blocks,
+				get_block_t get_block);
+int mpage_readpage(struct page *page, get_blocks_t get_blocks, get_block_t get_block);
 int mpage_writepages(struct address_space *mapping,
 		struct writeback_control *wbc, get_block_t get_block);
 int mpage_writepage(struct page *page, get_block_t *get_block,
Index: linux-2.6/fs/ext2/inode.c
===================================================================
--- linux-2.6.orig/fs/ext2/inode.c	2005-10-04 20:47:05.000000000 +0200
+++ linux-2.6/fs/ext2/inode.c	2005-10-04 21:22:43.000000000 +0200
@@ -621,6 +621,18 @@
 	goto reread;
 }
 
+static int
+ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
+			struct buffer_head *bh_result, int create)
+{
+	int ret;
+
+	ret = ext2_get_block(inode, iblock, bh_result, create);
+	if (ret == 0)
+		bh_result->b_size = (1 << inode->i_blkbits);
+	return ret;
+}
+
 static int ext2_writepage(struct page *page, struct writeback_control *wbc)
 {
 	return block_write_full_page(page, ext2_get_block, wbc);
@@ -628,14 +640,15 @@
 
 static int ext2_readpage(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, ext2_get_block);
+	return mpage_readpage(page, ext2_get_blocks, ext2_get_block);
 }
 
 static int
 ext2_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, ext2_get_block);
+	return mpage_readpages(mapping, pages, nr_pages,
+			ext2_get_blocks, ext2_get_block);
 }
 
 static int
@@ -663,18 +676,6 @@
 	return generic_block_bmap(mapping,block,ext2_get_block);
 }
 
-static int
-ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks,
-			struct buffer_head *bh_result, int create)
-{
-	int ret;
-
-	ret = ext2_get_block(inode, iblock, bh_result, create);
-	if (ret == 0)
-		bh_result->b_size = (1 << inode->i_blkbits);
-	return ret;
-}
-
 static ssize_t
 ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
Index: linux-2.6/fs/jfs/inode.c
===================================================================
--- linux-2.6.orig/fs/jfs/inode.c	2005-10-04 20:47:05.000000000 +0200
+++ linux-2.6/fs/jfs/inode.c	2005-10-04 20:47:31.000000000 +0200
@@ -273,13 +273,14 @@
 
 static int jfs_readpage(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, jfs_get_block);
+	return mpage_readpage(page, jfs_get_blocks, jfs_get_block);
 }
 
 static int jfs_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, jfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, jfs_get_blocks,
+			jfs_get_block);
 }
 
 static int jfs_prepare_write(struct file *file,
Index: linux-2.6/fs/reiserfs/inode.c
===================================================================
--- linux-2.6.orig/fs/reiserfs/inode.c	2005-10-04 20:47:05.000000000 +0200
+++ linux-2.6/fs/reiserfs/inode.c	2005-10-04 20:47:31.000000000 +0200
@@ -1049,10 +1049,24 @@
 }
 
 static int
+reiserfs_get_blocks(struct inode *inode, sector_t iblock,
+		unsigned long max_blocks, struct buffer_head *bh_result,
+		int create)
+{
+	int ret;
+
+	ret = reiserfs_get_block(inode, iblock, bh_result, create);
+	if (ret == 0)
+		bh_result->b_size = (1 << inode->i_blkbits);
+	return ret;
+}
+
+static int
 reiserfs_readpages(struct file *file, struct address_space *mapping,
 		   struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_blocks,
+			reiserfs_get_block);
 }
 
 /* Compute real number of used bytes by file

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2005-10-21 14:17 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-04 19:42 [PATCH, RFC] map multiple blocks at a time in mpage_readpage(s) Christoph Hellwig
2005-10-04 22:05 ` Dave Kleikamp
2005-10-04 23:15 ` Badari Pulavarty
2005-10-05  7:14   ` Christoph Hellwig
2005-10-07 22:13     ` Badari Pulavarty
2005-10-05  6:04 ` Nikita Danilov
2005-10-05  7:10   ` Christoph Hellwig
2005-10-05 16:11     ` Suparna Bhattacharya
2005-10-19 11:56 ` Christoph Hellwig
2005-10-19 15:07   ` Dave Kleikamp
2005-10-20 22:06   ` Andrew Morton
2005-10-21  6:18     ` Suparna Bhattacharya
2005-10-21 12:53     ` Dave Kleikamp
2005-10-21 14:16       ` Jörn Engel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).