From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christoph Hellwig Subject: [PATCH, RFC] map multiple blocks at a time in mpage_readpage(s) Date: Tue, 4 Oct 2005 21:42:46 +0200 Message-ID: <20051004194246.GA18432@lst.de> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Return-path: Received: from verein.lst.de ([213.95.11.210]:39841 "EHLO mail.lst.de") by vger.kernel.org with ESMTP id S964933AbVJDTms (ORCPT ); Tue, 4 Oct 2005 15:42:48 -0400 Received: from verein.lst.de (localhost [127.0.0.1]) by mail.lst.de (8.12.3/8.12.3/Debian-7.1) with ESMTP id j94Jgk6t018539 (version=TLSv1/SSLv3 cipher=EDH-RSA-DES-CBC3-SHA bits=168 verify=NO) for ; Tue, 4 Oct 2005 21:42:46 +0200 Received: (from hch@localhost) by verein.lst.de (8.12.3/8.12.3/Debian-6.6) id j94JgkoR018537 for linux-fsdevel@vger.kernel.org; Tue, 4 Oct 2005 21:42:46 +0200 To: linux-fsdevel@vger.kernel.org Content-Disposition: inline Sender: linux-fsdevel-owner@vger.kernel.org List-Id: linux-fsdevel.vger.kernel.org This patch changes mpage_readpage/mpage_readpages to use a get_blocks call that gets the disk mapping information for multiple blocks at the same time, similar to the way direct I/O code works. For extent based filesystems like jfs, xfs or reiser4 this allows to reduce the overhead of the allocator calls, which is especially nice when that codepath is rather heavyweight (as it is for example in xfs). For filesystems that don't have an allocator that make use of it nothing changes. I've tested this heavily on XFS with block size = page size and 512 byte blocks, and it passes the XFS QA regression test suite fine. I've done some basic fsx testing on all the other affected filesystems. Index: linux-2.6/fs/ext3/inode.c =================================================================== --- linux-2.6.orig/fs/ext3/inode.c 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/fs/ext3/inode.c 2005-10-04 20:47:55.000000000 +0200 @@ -800,6 +800,18 @@ return ret; } +static int +ext3_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + int ret; + + ret = ext3_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + #define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32) static int @@ -1411,14 +1423,15 @@ static int ext3_readpage(struct file *file, struct page *page) { - return mpage_readpage(page, ext3_get_block); + return mpage_readpage(page, ext3_get_blocks, ext3_get_block); } static int ext3_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); + return mpage_readpages(mapping, pages, nr_pages, + ext3_get_blocks, ext3_get_block); } static int ext3_invalidatepage(struct page *page, unsigned long offset) Index: linux-2.6/fs/mpage.c =================================================================== --- linux-2.6.orig/fs/mpage.c 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/fs/mpage.c 2005-10-04 20:48:54.000000000 +0200 @@ -165,7 +165,9 @@ static struct bio * do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages, - sector_t *last_block_in_bio, get_block_t get_block) + sector_t *last_block_in_bio, struct buffer_head *map_bh, + unsigned long *first_logical_block, int *map_valid, + get_blocks_t get_blocks, get_block_t get_block) { struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; @@ -177,29 +179,63 @@ unsigned page_block; unsigned first_hole = blocks_per_page; struct block_device *bdev = NULL; - struct buffer_head bh; int length; int fully_mapped = 1; + unsigned nblocks, i; if (page_has_buffers(page)) goto confused; block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits); last_block = (i_size_read(inode) + blocksize - 1) >> blkbits; + page_block = 0; + + /* + * Map blocks using the result from the last get_blocks call first. + */ + nblocks = map_bh->b_size >> inode->i_blkbits; + if (*map_valid && + block_in_file > *first_logical_block && + block_in_file < (*first_logical_block + nblocks)) { + unsigned map_offset = block_in_file - *first_logical_block; + unsigned last = nblocks - map_offset; + + for (i = 0; ; i++) { + if (i == last) { + *map_valid = 0; + break; + } else if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr + map_offset + i; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; + } + + /* + * Then do more get_blocks calls until we are done with this page. + */ + map_bh->b_page = page; + while (page_block < blocks_per_page) { + map_bh->b_state = 0; + map_bh->b_size = 0; - bh.b_page = page; - for (page_block = 0; page_block < blocks_per_page; - page_block++, block_in_file++) { - bh.b_state = 0; if (block_in_file < last_block) { - if (get_block(inode, block_in_file, &bh, 0)) + if (get_blocks(inode, block_in_file, + last_block - block_in_file, map_bh, 0)) goto confused; + *first_logical_block = block_in_file; + *map_valid = 1; } - if (!buffer_mapped(&bh)) { + if (!buffer_mapped(map_bh)) { fully_mapped = 0; if (first_hole == blocks_per_page) first_hole = page_block; + page_block++; + block_in_file++; + *map_valid = 0; continue; } @@ -209,8 +245,8 @@ * we just collected from get_block into the page's buffers * so readpage doesn't have to repeat the get_block call */ - if (buffer_uptodate(&bh)) { - map_buffer_to_page(page, &bh, page_block); + if (buffer_uptodate(map_bh)) { + map_buffer_to_page(page, map_bh, page_block); goto confused; } @@ -218,10 +254,20 @@ goto confused; /* hole -> non-hole */ /* Contiguous blocks? */ - if (page_block && blocks[page_block-1] != bh.b_blocknr-1) + if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1) goto confused; - blocks[page_block] = bh.b_blocknr; - bdev = bh.b_bdev; + nblocks = map_bh->b_size >> inode->i_blkbits; + for (i = 0; ; i++) { + if (i == nblocks) { + *map_valid = 0; + break; + } else if (page_block == blocks_per_page) + break; + blocks[page_block] = map_bh->b_blocknr + i; + page_block++; + block_in_file++; + } + bdev = map_bh->b_bdev; } if (first_hole != blocks_per_page) { @@ -260,7 +306,7 @@ goto alloc_new; } - if (buffer_boundary(&bh) || (first_hole != blocks_per_page)) + if (buffer_boundary(map_bh) || (first_hole != blocks_per_page)) bio = mpage_bio_submit(READ, bio); else *last_block_in_bio = blocks[blocks_per_page - 1]; @@ -325,12 +371,16 @@ */ int mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block) + unsigned nr_pages, get_blocks_t get_blocks, + get_block_t get_block) { struct bio *bio = NULL; unsigned page_idx; sector_t last_block_in_bio = 0; struct pagevec lru_pvec; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; + int map_valid = 0; pagevec_init(&lru_pvec, 0); for (page_idx = 0; page_idx < nr_pages; page_idx++) { @@ -342,7 +392,9 @@ page->index, GFP_KERNEL)) { bio = do_mpage_readpage(bio, page, nr_pages - page_idx, - &last_block_in_bio, get_block); + &last_block_in_bio, &map_bh, + &first_logical_block, &map_valid, + get_blocks, get_block); if (!pagevec_add(&lru_pvec, page)) __pagevec_lru_add(&lru_pvec); } else { @@ -360,13 +412,18 @@ /* * This isn't called much at all */ -int mpage_readpage(struct page *page, get_block_t get_block) +int mpage_readpage(struct page *page, get_blocks_t get_blocks, + get_block_t get_block) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; + struct buffer_head map_bh; + unsigned long first_logical_block = 0; + int map_valid = 0; - bio = do_mpage_readpage(bio, page, 1, - &last_block_in_bio, get_block); + bio = do_mpage_readpage(bio, page, 1, &last_block_in_bio, + &map_bh, &first_logical_block, &map_valid, + get_blocks, get_block); if (bio) mpage_bio_submit(READ, bio); return 0; Index: linux-2.6/fs/xfs/linux-2.6/xfs_aops.c =================================================================== --- linux-2.6.orig/fs/xfs/linux-2.6/xfs_aops.c 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/fs/xfs/linux-2.6/xfs_aops.c 2005-10-04 19:52:49.000000000 +0200 @@ -1034,6 +1034,18 @@ } STATIC int +linvfs_get_blocks( + struct inode *inode, + sector_t iblock, + unsigned long max_blocks, + struct buffer_head *bh_result, + int create) +{ + return __linvfs_get_block(inode, iblock, max_blocks, bh_result, + create, 0, BMAPI_WRITE); +} + +STATIC int linvfs_get_blocks_direct( struct inode *inode, sector_t iblock, @@ -1139,7 +1151,7 @@ struct file *unused, struct page *page) { - return mpage_readpage(page, linvfs_get_block); + return mpage_readpage(page, linvfs_get_blocks, linvfs_get_block); } STATIC int @@ -1149,7 +1161,8 @@ struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, linvfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, linvfs_get_blocks, + linvfs_get_block); } STATIC void Index: linux-2.6/include/linux/mpage.h =================================================================== --- linux-2.6.orig/include/linux/mpage.h 2005-10-04 19:51:00.000000000 +0200 +++ linux-2.6/include/linux/mpage.h 2005-10-04 19:52:49.000000000 +0200 @@ -14,8 +14,9 @@ typedef int (writepage_t)(struct page *page, struct writeback_control *wbc); int mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block); -int mpage_readpage(struct page *page, get_block_t get_block); + unsigned nr_pages, get_blocks_t get_blocks, + get_block_t get_block); +int mpage_readpage(struct page *page, get_blocks_t get_blocks, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); int mpage_writepage(struct page *page, get_block_t *get_block, Index: linux-2.6/fs/ext2/inode.c =================================================================== --- linux-2.6.orig/fs/ext2/inode.c 2005-10-04 20:47:05.000000000 +0200 +++ linux-2.6/fs/ext2/inode.c 2005-10-04 21:22:43.000000000 +0200 @@ -621,6 +621,18 @@ goto reread; } +static int +ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, + struct buffer_head *bh_result, int create) +{ + int ret; + + ret = ext2_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + static int ext2_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page, ext2_get_block, wbc); @@ -628,14 +640,15 @@ static int ext2_readpage(struct file *file, struct page *page) { - return mpage_readpage(page, ext2_get_block); + return mpage_readpage(page, ext2_get_blocks, ext2_get_block); } static int ext2_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); + return mpage_readpages(mapping, pages, nr_pages, + ext2_get_blocks, ext2_get_block); } static int @@ -663,18 +676,6 @@ return generic_block_bmap(mapping,block,ext2_get_block); } -static int -ext2_get_blocks(struct inode *inode, sector_t iblock, unsigned long max_blocks, - struct buffer_head *bh_result, int create) -{ - int ret; - - ret = ext2_get_block(inode, iblock, bh_result, create); - if (ret == 0) - bh_result->b_size = (1 << inode->i_blkbits); - return ret; -} - static ssize_t ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) Index: linux-2.6/fs/jfs/inode.c =================================================================== --- linux-2.6.orig/fs/jfs/inode.c 2005-10-04 20:47:05.000000000 +0200 +++ linux-2.6/fs/jfs/inode.c 2005-10-04 20:47:31.000000000 +0200 @@ -273,13 +273,14 @@ static int jfs_readpage(struct file *file, struct page *page) { - return mpage_readpage(page, jfs_get_block); + return mpage_readpage(page, jfs_get_blocks, jfs_get_block); } static int jfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, jfs_get_blocks, + jfs_get_block); } static int jfs_prepare_write(struct file *file, Index: linux-2.6/fs/reiserfs/inode.c =================================================================== --- linux-2.6.orig/fs/reiserfs/inode.c 2005-10-04 20:47:05.000000000 +0200 +++ linux-2.6/fs/reiserfs/inode.c 2005-10-04 20:47:31.000000000 +0200 @@ -1049,10 +1049,24 @@ } static int +reiserfs_get_blocks(struct inode *inode, sector_t iblock, + unsigned long max_blocks, struct buffer_head *bh_result, + int create) +{ + int ret; + + ret = reiserfs_get_block(inode, iblock, bh_result, create); + if (ret == 0) + bh_result->b_size = (1 << inode->i_blkbits); + return ret; +} + +static int reiserfs_readpages(struct file *file, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { - return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); + return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_blocks, + reiserfs_get_block); } /* Compute real number of used bytes by file