From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christoph Hellwig Subject: Re: [PATCH, RFC] map multiple blocks at a time in mpage_readpage(s) Date: Wed, 19 Oct 2005 13:56:39 +0200 Message-ID: <20051019115639.GA13589@lst.de> References: <20051004194246.GA18432@lst.de> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Cc: linux-fsdevel@vger.kernel.org Return-path: Received: from verein.lst.de ([213.95.11.210]:7553 "EHLO mail.lst.de") by vger.kernel.org with ESMTP id S1750826AbVJSL4y (ORCPT ); Wed, 19 Oct 2005 07:56:54 -0400 To: akpm@osdl.org Content-Disposition: inline In-Reply-To: <20051004194246.GA18432@lst.de> Sender: linux-fsdevel-owner@vger.kernel.org List-Id: linux-fsdevel.vger.kernel.org On Tue, Oct 04, 2005 at 09:42:46PM +0200, Christoph Hellwig wrote: > This patch changes mpage_readpage/mpage_readpages to use a get_blocks > call that gets the disk mapping information for multiple blocks at the > same time, similar to the way direct I/O code works. For extent based > filesystems like jfs, xfs or reiser4 this allows to reduce the overhead > of the allocator calls, which is especially nice when that codepath > is rather heavyweight (as it is for example in xfs). For filesystems > that don't have an allocator that make use of it nothing changes. > > I've tested this heavily on XFS with block size = page size and 512 byte > blocks, and it passes the XFS QA regression test suite fine. I've done > some basic fsx testing on all the other affected filesystems. I think the patch should go into -mm after no one had serious objections. Signed-off-by: Christoph Hellwig Index: linux-2.6/fs/ext3/inode.c =================================================================== --- linux-2.6.orig/fs/ext3/inode.c 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/fs/ext3/inode.c 2005-10-04 20:47:55.000000000 +0200 @@ -800,6 +800,18 @@ return ret; } +static int +ext3_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + int ret; + + ret = ext3_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) static int @@ -1411,14 +1423,15 @@ static int ext3_readpage(struct file *file, struct page *page) { - return mpage_readpage(page, ext3_get_block); + return mpage_readpage(page, ext3_get_blocks, ext3_get_block); } static int ext3_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); + return mpage_readpages(mapping, pages, nr_pages, + ext3_get_blocks, ext3_get_block); } static int ext3_invalidatepage(struct page *page, unsigned long offset) Index: linux-2.6/fs/mpage.c =================================================================== --- linux-2.6.orig/fs/mpage.c 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/fs/mpage.c 2005-10-04 20:48:54.000000000 +0200 @@ -165,7 +165,9 @@ static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, - sector_t *last_block_in_bio, get_block_t get_block) + sector_t *last_block_in_bio, struct buffer_head *map_bh, + unsigned long *first_logical_block, int *map_valid, + get_blocks_t get_blocks, get_block_t get_block) { struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -177,29 +179,63 @@ unsigned page_block; unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; - struct buffer_head bh; int length; int fully_mapped = 1; + unsigned nblocks, i; if (page_has_buffers(page)) goto confused; block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; + page_block = 0; + + /* + * Map blocks using the result from the last get_blocks call first. + */ + nblocks = map_bh->b_size >> inode->i_blkbits; + if (*map_valid && + block_in_file > *first_logical_block && + block_in_file < (*first_logical_block + nblocks)) { + unsigned map_offset = block_in_file - *first_logical_block; + unsigned last = nblocks - map_offset; + + for (i = 0; ; i++) { + if (i == last) { + *map_valid = 0; + break; + } else if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr + map_offset + i; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; + } + + /* + * Then do more get_blocks calls until we are done with this page. + */ + map_bh->b_page = page; + while (page_block < blocks_per_page) { + map_bh->b_state = 0; + map_bh->b_size = 0; - bh.b_page = page; - for (page_block = 0; page_block < blocks_per_page; - page_block++, block_in_file++) { - bh.b_state = 0; if (block_in_file < last_block) { - if (get_block(inode, block_in_file, &bh, 0)) + if (get_blocks(inode, block_in_file, + last_block - block_in_file, map_bh, 0)) goto confused; + *first_logical_block = block_in_file; + *map_valid = 1; } - if (!buffer_mapped(&bh)) { + if (!buffer_mapped(map_bh)) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; + page_block++; + block_in_file++; + *map_valid = 0; continue; } @@ -209,8 +245,8 @@ * we just collected from get_block into the page's buffers * so readpage doesn't have to repeat the get_block call */ - if (buffer_uptodate(&bh)) { - map_buffer_to_page(page, &bh, page_block); + if (buffer_uptodate(map_bh)) { + map_buffer_to_page(page, map_bh, page_block); goto confused; } @@ -218,10 +254,20 @@ goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ - if (page_block && blocks[page_block-1] != bh.b_blocknr-1) + if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1) goto confused; - blocks[page_block] = bh.b_blocknr; - bdev = bh.b_bdev; + nblocks = map_bh->b_size >> inode->i_blkbits; + for (i = 0; ; i++) { + if (i == nblocks) { + *map_valid = 0; + break; + } else if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr + i; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; } if (first_hole != blocks_per_page) { @@ -260,7 +306,7 @@ goto alloc_new; } - if (buffer_boundary(&bh) || (first_hole != blocks_per_page)) + if (buffer_boundary(map_bh) || (first_hole != blocks_per_page)) bio = mpage_bio_submit(READ, bio); else *last_block_in_bio = blocks[blocks_per_page - 1]; @@ -325,12 +371,16 @@ */ int mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block) + unsigned nr_pages, get_blocks_t get_blocks, + get_block_t get_block) { struct bio *bio = NULL; unsigned page_idx; sector_t last_block_in_bio = 0; struct pagevec lru_pvec; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; + int map_valid = 0; pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -342,7 +392,9 @@ page->index, GFP_KERNEL)) { bio = do_mpage_readpage(bio, page, nr_pages - page_idx, - &last_block_in_bio, get_block); + &last_block_in_bio, &map_bh, + &first_logical_block, &map_valid, + get_blocks, get_block); if (!pagevec_add(&lru_pvec, page)) __pagevec_lru_add(&lru_pvec); } else { @@ -360,13 +412,18 @@ /* * This isn't called much at all */ -int mpage_readpage(struct page *page, get_block_t get_block) +int mpage_readpage(struct page *page, get_blocks_t get_blocks, + get_block_t get_block) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; + int map_valid = 0; - bio = do_mpage_readpage(bio, page, 1, - &last_block_in_bio, get_block); + bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, + &map_bh, &first_logical_block, &map_valid, + get_blocks, get_block); if (bio) mpage_bio_submit(READ, bio); return 0; Index: linux-2.6/fs/xfs/linux-2.6/xfs_aops.c =================================================================== --- linux-2.6.orig/fs/xfs/linux-2.6/xfs_aops.c 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/fs/xfs/linux-2.6/xfs_aops.c 2005-10-04 19:52:49.000000000 +0200 @@ -1034,6 +1034,18 @@ } STATIC int +linvfs_get_blocks( + struct inode *inode, + sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, + int create) +{ + return __linvfs_get_block(inode, iblock, max_blocks, bh_result, + create, 0, BMAPI_WRITE); +} + +STATIC int linvfs_get_blocks_direct( struct inode *inode, sector_t iblock, @@ -1139,7 +1151,7 @@ struct file *unused, struct page *page) { - return mpage_readpage(page, linvfs_get_block); + return mpage_readpage(page, linvfs_get_blocks, linvfs_get_block); } STATIC int @@ -1149,7 +1161,8 @@ struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, linvfs_get_blocks, + linvfs_get_block); } STATIC void Index: linux-2.6/include/linux/mpage.h =================================================================== --- linux-2.6.orig/include/linux/mpage.h 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/include/linux/mpage.h 2005-10-04 19:52:49.000000000 +0200 @@ -14,8 +14,9 @@ typedef int (writepage_t)(struct page *page, struct writeback_control *wbc); int mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block); -int mpage_readpage(struct page *page, get_block_t get_block); + unsigned nr_pages, get_blocks_t get_blocks, + get_block_t get_block); +int mpage_readpage(struct page *page, get_blocks_t get_blocks, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, Index: linux-2.6/fs/ext2/inode.c =================================================================== --- linux-2.6.orig/fs/ext2/inode.c 2005-10-04 20:47:05.000000000 +0200 +++ linux-2.6/fs/ext2/inode.c 2005-10-04 21:22:43.000000000 +0200 @@ -621,6 +621,18 @@ goto reread; } +static int +ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + int ret; + + ret = ext2_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + static int ext2_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, ext2_get_block, wbc); @@ -628,14 +640,15 @@ static int ext2_readpage(struct file *file, struct page *page) { - return mpage_readpage(page, ext2_get_block); + return mpage_readpage(page, ext2_get_blocks, ext2_get_block); } static int ext2_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); + return mpage_readpages(mapping, pages, nr_pages, + ext2_get_blocks, ext2_get_block); } static int @@ -663,18 +676,6 @@ return generic_block_bmap(mapping,block,ext2_get_block); } -static int -ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, - struct buffer_head *bh_result, int create) -{ - int ret; - - ret = ext2_get_block(inode, iblock, bh_result, create); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -} - static ssize_t ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) Index: linux-2.6/fs/jfs/inode.c =================================================================== --- linux-2.6.orig/fs/jfs/inode.c 2005-10-04 20:47:05.000000000 +0200 +++ linux-2.6/fs/jfs/inode.c 2005-10-04 20:47:31.000000000 +0200 @@ -273,13 +273,14 @@ static int jfs_readpage(struct file *file, struct page *page) { - return mpage_readpage(page, jfs_get_block); + return mpage_readpage(page, jfs_get_blocks, jfs_get_block); } static int jfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, jfs_get_blocks, + jfs_get_block); } static int jfs_prepare_write(struct file *file, Index: linux-2.6/fs/reiserfs/inode.c =================================================================== --- linux-2.6.orig/fs/reiserfs/inode.c 2005-10-04 20:47:05.000000000 +0200 +++ linux-2.6/fs/reiserfs/inode.c 2005-10-04 20:47:31.000000000 +0200 @@ -1049,10 +1049,24 @@ } static int +reiserfs_get_blocks(struct inode *inode, sector_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create) +{ + int ret; + + ret = reiserfs_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + +static int reiserfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_blocks, + reiserfs_get_block); } /* Compute real number of used bytes by file