All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mingming Cao <cmm@us.ibm.com>
To: ext2-devel <ext2-devel@lists.sourceforge.net>,
	Andrew Morton <akpm@osdl.org>,
	"Stephen C. Tweedie" <sct@redhat.com>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	linux-fsdevel@vger.kernel.org
Cc: Badari Pulavarty <pbadari@us.ibm.com>, suparna@in.ibm.com, tytso@mit.edu
Subject: [RFC] [PATCH 3/4]generic getblocks() support in mpage_writepages
Date: Sun, 17 Jul 2005 10:40:55 -0700	[thread overview]
Message-ID: <1121622055.4609.26.camel@localhost.localdomain> (raw)
In-Reply-To: <1110839154.24286.302.camel@dyn318077bld.beaverton.ibm.com>

Updated patch from Suparna for generic support for cluster pages
together in mapge_writepages() to make use of getblocks() 

---

 linux-2.6.12-ming/fs/buffer.c                 |   49 -----
 linux-2.6.12-ming/fs/ext2/inode.c             |   15 -
 linux-2.6.12-ming/fs/ext3/inode.c             |   15 +
 linux-2.6.12-ming/fs/ext3/super.c             |    3 
 linux-2.6.12-ming/fs/hfs/inode.c              |    2 
 linux-2.6.12-ming/fs/hfsplus/inode.c          |    2 
 linux-2.6.12-ming/fs/jfs/inode.c              |   24 ++
 linux-2.6.12-ming/fs/mpage.c                  |  214 ++++++++++++++++++--------
 linux-2.6.12-ming/include/linux/buffer_head.h |    4 
 linux-2.6.12-ming/include/linux/fs.h          |    2 
 linux-2.6.12-ming/include/linux/mpage.h       |   11 -
 linux-2.6.12-ming/include/linux/pagemap.h     |    3 
 linux-2.6.12-ming/include/linux/pagevec.h     |    3 
 linux-2.6.12-ming/include/linux/radix-tree.h  |   14 +
 linux-2.6.12-ming/lib/radix-tree.c            |   25 ++-
 linux-2.6.12-ming/mm/filemap.c                |    9 -
 linux-2.6.12-ming/mm/swap.c                   |   11 +
 17 files changed, 270 insertions(+), 136 deletions(-)

diff -puN fs/buffer.c~mpage_writepages_getblocks fs/buffer.c
--- linux-2.6.12/fs/buffer.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/buffer.c	2005-07-15 00:11:01.000000000 -0700
@@ -2509,53 +2509,10 @@ EXPORT_SYMBOL(nobh_commit_write);
  * that it tries to operate without attaching bufferheads to
  * the page.
  */
-int nobh_writepage(struct page *page, get_block_t *get_block,
-			struct writeback_control *wbc)
+int nobh_writepage(struct page *page, get_blocks_t *get_blocks,
+		struct writeback_control *wbc, writepage_t bh_writepage_fn)
 {
-	struct inode * const inode = page->mapping->host;
-	loff_t i_size = i_size_read(inode);
-	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
-	unsigned offset;
-	void *kaddr;
-	int ret;
-
-	/* Is the page fully inside i_size? */
-	if (page->index < end_index)
-		goto out;
-
-	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
-	if (page->index >= end_index+1 || !offset) {
-		/*
-		 * The page may have dirty, unmapped buffers.  For example,
-		 * they may have been added in ext3_writepage().  Make them
-		 * freeable here, so the page does not leak.
-		 */
-#if 0
-		/* Not really sure about this  - do we need this ? */
-		if (page->mapping->a_ops->invalidatepage)
-			page->mapping->a_ops->invalidatepage(page, offset);
-#endif
-		unlock_page(page);
-		return 0; /* don't care */
-	}
-
-	/*
-	 * The page straddles i_size.  It must be zeroed out on each and every
-	 * writepage invocation because it may be mmapped.  "A file is mapped
-	 * in multiples of the page size.  For a file that is not a multiple of
-	 * the  page size, the remaining memory is zeroed when mapped, and
-	 * writes to that region are not written out to the file."
-	 */
-	kaddr = kmap_atomic(page, KM_USER0);
-	memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
-	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
-out:
-	ret = mpage_writepage(page, get_block, wbc);
-	if (ret == -EAGAIN)
-		ret = __block_write_full_page(inode, page, get_block, wbc);
-	return ret;
+	return mpage_writepage(page, get_blocks, wbc, bh_writepage_fn);
 }
 EXPORT_SYMBOL(nobh_writepage);
 
diff -puN fs/ext2/inode.c~mpage_writepages_getblocks fs/ext2/inode.c
--- linux-2.6.12/fs/ext2/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/ext2/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -650,12 +650,6 @@ ext2_nobh_prepare_write(struct file *fil
 	return nobh_prepare_write(page,from,to,ext2_get_block);
 }
 
-static int ext2_nobh_writepage(struct page *page,
-			struct writeback_control *wbc)
-{
-	return nobh_writepage(page, ext2_get_block, wbc);
-}
-
 static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,ext2_get_block);
@@ -673,6 +667,12 @@ ext2_get_blocks(struct inode *inode, sec
 	return ret;
 }
 
+static int ext2_nobh_writepage(struct page *page,
+			struct writeback_control *wbc)
+{
+	return nobh_writepage(page, ext2_get_blocks, wbc, ext2_writepage);
+}
+
 static ssize_t
 ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
@@ -687,7 +687,8 @@ ext2_direct_IO(int rw, struct kiocb *ioc
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, ext2_get_block);
+        return __mpage_writepages(mapping, wbc, ext2_get_blocks,
+					ext2_writepage);
 }
 
 struct address_space_operations ext2_aops = {
diff -puN fs/ext3/super.c~mpage_writepages_getblocks fs/ext3/super.c
--- linux-2.6.12/fs/ext3/super.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/ext3/super.c	2005-07-15 00:11:01.000000000 -0700
@@ -1353,6 +1353,7 @@ static int ext3_fill_super (struct super
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
 
 	set_opt(sbi->s_mount_opt, RESERVATION);
+	set_opt(sbi->s_mount_opt, NOBH); /* temp: set nobh default */
 
 	if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
 		goto failed_mount;
@@ -1599,6 +1600,7 @@ static int ext3_fill_super (struct super
 			printk(KERN_ERR "EXT3-fs: Journal does not support "
 			       "requested data journaling mode\n");
 			goto failed_mount3;
+		set_opt(sbi->s_mount_opt, NOBH); /* temp: set nobh default */
 		}
 	default:
 		break;
@@ -1616,6 +1618,7 @@ static int ext3_fill_super (struct super
 				"its supported only with writeback mode\n");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
+		printk("NOBH option set\n");
 	}
 	if (test_opt(sb, DELAYED_ALLOC)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
diff -puN fs/ext3/inode.c~mpage_writepages_getblocks fs/ext3/inode.c
--- linux-2.6.12/fs/ext3/inode.c~mpage_writepages_getblocks	2005-07-15 17:32:05.865000480 -0700
+++ linux-2.6.12-ming/fs/ext3/inode.c	2005-07-15 18:06:49.384257408 -0700
@@ -1195,6 +1195,11 @@ get_block:
 }
 
 
+static int ext3_writepages_get_blocks(struct inode *inode, sector_t iblock,
+		unsigned long max_blocks, struct buffer_head *bh, int create)
+{
+	return ext3_direct_io_get_blocks(inode, iblock, max_blocks, bh, create);
+}
 /*
  * `handle' can be NULL if create is zero
  */
@@ -1674,6 +1679,13 @@ out_fail:
 	return ret;
 }
 
+static int
+ext3_writeback_writepage_helper(struct page *page,
+				struct writeback_control *wbc)
+{
+	return block_write_full_page(page, ext3_get_block, wbc);
+}
+
 static int ext3_writeback_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
@@ -1692,7 +1704,8 @@ static int ext3_writeback_writepage(stru
 	}
 
 	if (test_opt(inode->i_sb, NOBH))
-		ret = nobh_writepage(page, ext3_get_block, wbc);
+		ret = nobh_writepage(page, ext3_writepages_get_blocks, wbc,
+			ext3_writeback_writepage_helper);
 	else
 		ret = block_write_full_page(page, ext3_get_block, wbc);
 
diff -puN fs/hfs/inode.c~mpage_writepages_getblocks fs/hfs/inode.c
--- linux-2.6.12/fs/hfs/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/hfs/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -124,7 +124,7 @@ static ssize_t hfs_direct_IO(int rw, str
 static int hfs_writepages(struct address_space *mapping,
 			  struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, hfs_get_block);
+	return mpage_writepages(mapping, wbc, hfs_get_blocks);
 }
 
 struct address_space_operations hfs_btree_aops = {
diff -puN fs/hfsplus/inode.c~mpage_writepages_getblocks fs/hfsplus/inode.c
--- linux-2.6.12/fs/hfsplus/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/hfsplus/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -121,7 +121,7 @@ static ssize_t hfsplus_direct_IO(int rw,
 static int hfsplus_writepages(struct address_space *mapping,
 			      struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, hfsplus_get_block);
+	return mpage_writepages(mapping, wbc, hfsplus_get_blocks);
 }
 
 struct address_space_operations hfsplus_btree_aops = {
diff -puN fs/jfs/inode.c~mpage_writepages_getblocks fs/jfs/inode.c
--- linux-2.6.12/fs/jfs/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/jfs/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -249,21 +249,41 @@ jfs_get_blocks(struct inode *ip, sector_
 	return rc;
 }
 
+static int
+jfs_mpage_get_blocks(struct inode *ip, sector_t lblock, unsigned long
+			max_blocks, struct buffer_head *bh_result, int create)
+{
+	/*
+	 * fixme: temporary workaround: return one block at a time until
+	 * we figure out why we see exposures with truncate on
+	 * allocating multiple blocks in one shot.
+	 */
+	return jfs_get_blocks(ip, lblock, 1, bh_result, create);
+}
+
 static int jfs_get_block(struct inode *ip, sector_t lblock,
 			 struct buffer_head *bh_result, int create)
 {
 	return jfs_get_blocks(ip, lblock, 1, bh_result, create);
 }
 
+static int jfs_bh_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	return block_write_full_page(page, jfs_get_block, wbc);
+}
+
+
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	return nobh_writepage(page, jfs_get_block, wbc);
+	return nobh_writepage(page, jfs_mpage_get_blocks, wbc, jfs_bh_writepage);
 }
 
 static int jfs_writepages(struct address_space *mapping,
 			struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, jfs_get_block);
+        return __mpage_writepages(mapping, wbc, jfs_mpage_get_blocks,
+					jfs_bh_writepage);
 }
 
 static int jfs_readpage(struct file *file, struct page *page)
diff -puN fs/mpage.c~mpage_writepages_getblocks fs/mpage.c
--- linux-2.6.12/fs/mpage.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/mpage.c	2005-07-15 18:06:49.397255432 -0700
@@ -373,6 +373,67 @@ int mpage_readpage(struct page *page, ge
 }
 EXPORT_SYMBOL(mpage_readpage);
 
+struct mpageio {
+	struct bio *bio;
+	struct buffer_head map_bh;
+	unsigned long block_in_file;
+	unsigned long final_block_in_request;
+	sector_t block_in_bio;
+	int boundary;
+	sector_t boundary_block;
+	struct block_device *boundary_bdev;
+};
+
+/*
+ * Maps as many contiguous disk blocks as it can within the range of
+ * the request, and returns the total number of contiguous mapped
+ * blocks in the mpageio.
+ */
+static unsigned long mpage_get_more_blocks(struct mpageio *mio,
+	struct inode *inode, get_blocks_t get_blocks)
+{
+	struct buffer_head map_bh = {.b_state = 0};
+	unsigned long mio_nblocks = mio->map_bh.b_size >> inode->i_blkbits;
+	unsigned long first_unmapped = mio->block_in_file + mio_nblocks;
+	unsigned long next_contig_block = mio->map_bh.b_blocknr + mio_nblocks;
+
+	while ((first_unmapped < mio->final_block_in_request) &&
+		(mio->map_bh.b_size < PAGE_SIZE)) {
+
+		if (get_blocks(inode, first_unmapped,
+			mio->final_block_in_request - first_unmapped,
+			&map_bh, 1))
+			break;
+		if (mio_nblocks && ((map_bh.b_blocknr != next_contig_block) ||
+			map_bh.b_bdev != mio->map_bh.b_bdev))
+			break;
+
+		if (buffer_new(&map_bh)) {
+			int i = 0;
+			for (; i < map_bh.b_size >> inode->i_blkbits; i++)
+				unmap_underlying_metadata(map_bh.b_bdev,
+					map_bh.b_blocknr + i);
+		}
+
+		if (buffer_boundary(&map_bh)) {
+			mio->boundary = 1;
+			mio->boundary_block = map_bh.b_blocknr;
+			mio->boundary_bdev = map_bh.b_bdev;
+		}
+		if (mio_nblocks == 0) {
+			mio->map_bh.b_bdev = map_bh.b_bdev;
+			mio->map_bh.b_blocknr = map_bh.b_blocknr;
+		}
+
+		mio_nblocks += map_bh.b_size >> inode->i_blkbits;
+		first_unmapped = mio->block_in_file + mio_nblocks;
+		next_contig_block = mio->map_bh.b_blocknr + mio_nblocks;
+		mio->map_bh.b_size += map_bh.b_size;
+	}
+
+	return mio_nblocks;
+}
+
 /*
  * Writing is not so simple.
  *
@@ -389,9 +450,9 @@ EXPORT_SYMBOL(mpage_readpage);
  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
  * just allocate full-size (16-page) BIOs.
  */
-static struct bio *
-__mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
-	sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc,
+static int
+__mpage_writepage(struct mpageio *mio, struct page *page,
+	get_blocks_t get_blocks, struct writeback_control *wbc,
 	writepage_t writepage_fn)
 {
 	struct address_space *mapping = page->mapping;
@@ -399,9 +460,8 @@ __mpage_writepage(struct bio *bio, struc
 	const unsigned blkbits = inode->i_blkbits;
 	unsigned long end_index;
 	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
-	sector_t last_block;
+	sector_t last_block, blocks_to_skip;
 	sector_t block_in_file;
-	sector_t blocks[MAX_BUF_PER_PAGE];
 	unsigned page_block;
 	unsigned first_unmapped = blocks_per_page;
 	struct block_device *bdev = NULL;
@@ -409,8 +469,10 @@ __mpage_writepage(struct bio *bio, struc
 	sector_t boundary_block = 0;
 	struct block_device *boundary_bdev = NULL;
 	int length;
-	struct buffer_head map_bh;
 	loff_t i_size = i_size_read(inode);
+	struct buffer_head *map_bh = &mio->map_bh;
+	struct bio *bio = mio->bio;
+	int ret = 0;
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -438,10 +500,13 @@ __mpage_writepage(struct bio *bio, struc
 			if (!buffer_dirty(bh) || !buffer_uptodate(bh))
 				goto confused;
 			if (page_block) {
-				if (bh->b_blocknr != blocks[page_block-1] + 1)
+				if (bh->b_blocknr != map_bh->b_blocknr
+					+ page_block)
 					goto confused;
+			} else {
+				map_bh->b_blocknr = bh->b_blocknr;
+				map_bh->b_size = PAGE_SIZE;
 			}
-			blocks[page_block++] = bh->b_blocknr;
 			boundary = buffer_boundary(bh);
 			if (boundary) {
 				boundary_block = bh->b_blocknr;
@@ -468,33 +533,30 @@ __mpage_writepage(struct bio *bio, struc
 	BUG_ON(!PageUptodate(page));
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (i_size - 1) >> blkbits;
-	map_bh.b_page = page;
-	for (page_block = 0; page_block < blocks_per_page; ) {
-
-		map_bh.b_state = 0;
-		if (get_block(inode, block_in_file, &map_bh, 1))
-			goto confused;
-		if (buffer_new(&map_bh))
-			unmap_underlying_metadata(map_bh.b_bdev,
-						map_bh.b_blocknr);
-		if (buffer_boundary(&map_bh)) {
-			boundary_block = map_bh.b_blocknr;
-			boundary_bdev = map_bh.b_bdev;
-		}
-		if (page_block) {
-			if (map_bh.b_blocknr != blocks[page_block-1] + 1)
-				goto confused;
-		}
-		blocks[page_block++] = map_bh.b_blocknr;
-		boundary = buffer_boundary(&map_bh);
-		bdev = map_bh.b_bdev;
-		if (block_in_file == last_block)
-			break;
-		block_in_file++;
+	blocks_to_skip = block_in_file - mio->block_in_file;
+	mio->block_in_file = block_in_file;
+	if (blocks_to_skip < (map_bh->b_size >> blkbits)) {
+		map_bh->b_blocknr += blocks_to_skip;
+		map_bh->b_size -= blocks_to_skip << blkbits;
+	} else {
+		map_bh->b_state = 0;
+		map_bh->b_size = 0;
+		if (mio->final_block_in_request > last_block)
+			mio->final_block_in_request = last_block;
+		mpage_get_more_blocks(mio, inode, get_blocks);
 	}
-	BUG_ON(page_block == 0);
+	if (map_bh->b_size < PAGE_SIZE)
+		goto confused;
 
-	first_unmapped = page_block;
+	if (mio->boundary && (mio->boundary_block < map_bh->b_blocknr
+		+ blocks_per_page)) {
+		boundary = 1;
+		boundary_block = mio->boundary_block;
+		boundary_bdev = mio->boundary_bdev;
+	}
+
+	bdev = map_bh->b_bdev;
+	first_unmapped = blocks_per_page;
 
 page_is_mapped:
 	end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -521,12 +583,16 @@ page_is_mapped:
 	/*
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
-	if (bio && *last_block_in_bio != blocks[0] - 1)
+	if (bio && mio->block_in_bio != map_bh->b_blocknr - 1)
 		bio = mpage_bio_submit(WRITE, bio);
 
 alloc_new:
 	if (bio == NULL) {
-		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
+		/*
+		 * Fixme: bio size can be limited to final_block - block, or
+		 * even mio->map_bh.b_size
+		 */
+		bio = mpage_alloc(bdev, map_bh->b_blocknr << (blkbits - 9),
 				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
 			goto confused;
@@ -542,6 +608,9 @@ alloc_new:
 		bio = mpage_bio_submit(WRITE, bio);
 		goto alloc_new;
 	}
+	map_bh->b_blocknr += blocks_per_page;
+	map_bh->b_size -= PAGE_SIZE;
+	mio->block_in_file += blocks_per_page;
 
 	/*
 	 * OK, we have our BIO, so we can now mark the buffers clean.  Make
@@ -578,7 +647,8 @@ alloc_new:
 					boundary_block, 1 << blkbits);
 		}
 	} else {
-		*last_block_in_bio = blocks[blocks_per_page - 1];
+		/* we can pack more pages into the bio, don't submit yet */
+		mio->block_in_bio = map_bh->b_blocknr - 1;
 	}
 	goto out;
 
@@ -587,22 +657,23 @@ confused:
 		bio = mpage_bio_submit(WRITE, bio);
 
 	if (writepage_fn) {
-		*ret = (*writepage_fn)(page, wbc);
+		ret = (*writepage_fn)(page, wbc);
 	} else {
-		*ret = -EAGAIN;
+		ret = -EAGAIN;
 		goto out;
 	}
 	/*
 	 * The caller has a ref on the inode, so *mapping is stable
 	 */
-	if (*ret) {
-		if (*ret == -ENOSPC)
+	if (ret) {
+		if (ret == -ENOSPC)
 			set_bit(AS_ENOSPC, &mapping->flags);
 		else
 			set_bit(AS_EIO, &mapping->flags);
 	}
 out:
-	return bio;
+	mio->bio = bio;
+	return ret;
 }
 
 /**
@@ -628,11 +699,21 @@ out:
  */
 int
 mpage_writepages(struct address_space *mapping,
-		struct writeback_control *wbc, get_block_t get_block)
+		struct writeback_control *wbc, get_blocks_t get_blocks)
+{
+	return __mpage_writepages(mapping, wbc, get_blocks,
+                mapping->a_ops->writepage);
+
+}
+int
+__mpage_writepages(struct address_space *mapping,
+                struct writeback_control *wbc, get_blocks_t get_blocks,
+                writepage_t writepage_fn)
 {
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct bio *bio = NULL;
-	sector_t last_block_in_bio = 0;
+	struct inode *inode = mapping->host;
+	const unsigned blkbits = inode->i_blkbits;
 	int ret = 0;
 	int done = 0;
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -642,6 +723,9 @@ mpage_writepages(struct address_space *m
 	pgoff_t end = -1;		/* Inclusive */
 	int scanned = 0;
 	int is_range = 0;
+	struct mpageio mio = {
+		.bio = NULL
+	};
 
 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
 		wbc->encountered_congestion = 1;
@@ -649,7 +733,7 @@ mpage_writepages(struct address_space *m
 	}
 
 	writepage = NULL;
-	if (get_block == NULL)
+	if (get_blocks == NULL)
 		writepage = mapping->a_ops->writepage;
 
 	pagevec_init(&pvec, 0);
@@ -666,12 +750,15 @@ mpage_writepages(struct address_space *m
 		scanned = 1;
 	}
 retry:
+	down_read(&inode->i_alloc_sem);
 	while (!done && (index <= end) &&
-			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			PAGECACHE_TAG_DIRTY,
+			(nr_pages = pagevec_contig_lookup_tag(&pvec, mapping,
+			&index, PAGECACHE_TAG_DIRTY,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
 		unsigned i;
 
+		mio.final_block_in_request = min(index, end) <<
+			(PAGE_CACHE_SHIFT - blkbits);
 		scanned = 1;
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
@@ -696,7 +783,7 @@ retry:
 				unlock_page(page);
 				continue;
 			}
-
+
 			if (wbc->sync_mode != WB_SYNC_NONE)
 				wait_on_page_writeback(page);
 
@@ -717,9 +804,9 @@ retry:
 							&mapping->flags);
 				}
 			} else {
-				bio = __mpage_writepage(bio, page, get_block,
-						&last_block_in_bio, &ret, wbc,
-						page->mapping->a_ops->writepage);
+				ret = __mpage_writepage(&mio, page, get_blocks,
+						wbc, writepage_fn);
+				bio = mio.bio;
 			}
 			if (unlikely(ret == WRITEPAGE_ACTIVATE))
 				unlock_page(page);
@@ -733,6 +820,9 @@ retry:
 		pagevec_release(&pvec);
 		cond_resched();
 	}
+
+	up_read(&inode->i_alloc_sem);
+
 	if (!scanned && !done) {
 		/*
 		 * We hit the last page and there is more work to be done: wrap
@@ -749,18 +839,24 @@ retry:
 	return ret;
 }
 EXPORT_SYMBOL(mpage_writepages);
+EXPORT_SYMBOL(__mpage_writepages);
 
-int mpage_writepage(struct page *page, get_block_t get_block,
-	struct writeback_control *wbc)
+int mpage_writepage(struct page *page, get_blocks_t get_blocks,
+		struct writeback_control *wbc, writepage_t writepage_fn)
 {
 	int ret = 0;
-	struct bio *bio;
-	sector_t last_block_in_bio = 0;
-
-	bio = __mpage_writepage(NULL, page, get_block,
-			&last_block_in_bio, &ret, wbc, NULL);
-	if (bio)
-		mpage_bio_submit(WRITE, bio);
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	const unsigned blkbits = inode->i_blkbits;
+	struct mpageio mio = {
+		.final_block_in_request = (page->index + 1) << (PAGE_CACHE_SHIFT
+			- blkbits)
+	};
+
+	ret = __mpage_writepage(&mio, page, get_blocks,
+			wbc, writepage_fn);
+	if (mio.bio)
+		mpage_bio_submit(WRITE, mio.bio);
 
 	return ret;
 }
diff -puN include/linux/buffer_head.h~mpage_writepages_getblocks include/linux/buffer_head.h
--- linux-2.6.12/include/linux/buffer_head.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/buffer_head.h	2005-07-15 00:11:01.000000000 -0700
@@ -206,8 +206,8 @@ int file_fsync(struct file *, struct den
 int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
 int nobh_truncate_page(struct address_space *, loff_t);
-int nobh_writepage(struct page *page, get_block_t *get_block,
-                        struct writeback_control *wbc);
+int nobh_writepage(struct page *page, get_blocks_t *get_blocks,
+	struct writeback_control *wbc, writepage_t bh_writepage);
 
 
 /*
diff -puN include/linux/fs.h~mpage_writepages_getblocks include/linux/fs.h
--- linux-2.6.12/include/linux/fs.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/fs.h	2005-07-15 00:11:01.000000000 -0700
@@ -305,6 +305,8 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
+
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
diff -puN include/linux/mpage.h~mpage_writepages_getblocks include/linux/mpage.h
--- linux-2.6.12/include/linux/mpage.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/mpage.h	2005-07-15 18:06:49.398255280 -0700
@@ -11,15 +11,18 @@
  */
 
 struct writeback_control;
-typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
 
 int mpage_readpages(struct address_space *mapping, struct list_head *pages,
 				unsigned nr_pages, get_block_t get_block);
 int mpage_readpage(struct page *page, get_block_t get_block);
+
 int mpage_writepages(struct address_space *mapping,
-		struct writeback_control *wbc, get_block_t get_block);
-int mpage_writepage(struct page *page, get_block_t *get_block,
-		struct writeback_control *wbc);
+                struct writeback_control *wbc, get_blocks_t get_blocks);
+int mpage_writepage(struct page *page, get_blocks_t *get_blocks,
+                struct writeback_control *wbc, writepage_t writepage);
+int __mpage_writepages(struct address_space *mapping,
+                struct writeback_control *wbc, get_blocks_t get_blocks,
+                writepage_t writepage);
 
 static inline int
 generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
diff -puN include/linux/pagemap.h~mpage_writepages_getblocks include/linux/pagemap.h
--- linux-2.6.12/include/linux/pagemap.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/pagemap.h	2005-07-15 00:11:01.000000000 -0700
@@ -73,7 +73,8 @@ extern struct page * find_or_create_page
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages);
+			int tag, unsigned int nr_pages, struct page **pages,
+			int contig);
 
 /*
  * Returns locked page at given index in given cache, creating it if needed.
diff -puN include/linux/pagevec.h~mpage_writepages_getblocks include/linux/pagevec.h
--- linux-2.6.12/include/linux/pagevec.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/pagevec.h	2005-07-15 00:11:01.000000000 -0700
@@ -28,6 +28,9 @@ unsigned pagevec_lookup(struct pagevec *
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, int tag,
 		unsigned nr_pages);
+unsigned pagevec_contig_lookup_tag(struct pagevec *pvec,
+		struct address_space *mapping, pgoff_t *index, int tag,
+		unsigned nr_pages);
 
 static inline void pagevec_init(struct pagevec *pvec, int cold)
 {
diff -puN include/linux/radix-tree.h~mpage_writepages_getblocks include/linux/radix-tree.h
--- linux-2.6.12/include/linux/radix-tree.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/radix-tree.h	2005-07-15 00:11:01.000000000 -0700
@@ -59,8 +59,18 @@ void *radix_tree_tag_clear(struct radix_
 int radix_tree_tag_get(struct radix_tree_root *root,
 			unsigned long index, int tag);
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
-		unsigned long first_index, unsigned int max_items, int tag);
+__radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+		unsigned long first_index, unsigned int max_items, int tag,
+		int contig);
+
+static inline unsigned int radix_tree_gang_lookup_tag(struct radix_tree_root
+		*root, void **results, unsigned long first_index,
+		unsigned int max_items, int tag)
+{
+	return __radix_tree_gang_lookup_tag(root, results, first_index,
+		max_items, tag, 0);
+}
+
 int radix_tree_tagged(struct radix_tree_root *root, int tag);
 
 static inline void radix_tree_preload_end(void)
diff -puN lib/radix-tree.c~mpage_writepages_getblocks lib/radix-tree.c
--- linux-2.6.12/lib/radix-tree.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/lib/radix-tree.c	2005-07-15 00:11:01.000000000 -0700
@@ -557,12 +557,13 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  */
 static unsigned int
 __lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
-	unsigned int max_items, unsigned long *next_index, int tag)
+	unsigned int max_items, unsigned long *next_index, int tag, int contig)
 {
 	unsigned int nr_found = 0;
 	unsigned int shift;
 	unsigned int height = root->height;
 	struct radix_tree_node *slot;
+	unsigned long cindex = (contig && (*next_index)) ? *next_index : -1;
 
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	slot = root->rnode;
@@ -575,6 +576,11 @@ __lookup_tag(struct radix_tree_root *roo
 				BUG_ON(slot->slots[i] == NULL);
 				break;
 			}
+			if (contig && index >= cindex) {
+				/* break in contiguity */
+				index = 0;
+				goto out;
+			}
 			index &= ~((1UL << shift) - 1);
 			index += 1UL << shift;
 			if (index == 0)
@@ -593,6 +599,10 @@ __lookup_tag(struct radix_tree_root *roo
 					results[nr_found++] = slot->slots[j];
 					if (nr_found == max_items)
 						goto out;
+				} else if (contig && nr_found) {
+					/* break in contiguity */
+					index = 0;
+					goto out;
 				}
 			}
 		}
@@ -618,29 +628,32 @@ out:
  *	returns the number of items which were placed at *@results.
  */
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
-		unsigned long first_index, unsigned int max_items, int tag)
+__radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+		unsigned long first_index, unsigned int max_items, int tag,
+		int contig)
 {
 	const unsigned long max_index = radix_tree_maxindex(root->height);
 	unsigned long cur_index = first_index;
+	unsigned long next_index = 0;	/* Index of next contiguous search */
 	unsigned int ret = 0;
 
 	while (ret < max_items) {
 		unsigned int nr_found;
-		unsigned long next_index;	/* Index of next search */
 
 		if (cur_index > max_index)
 			break;
 		nr_found = __lookup_tag(root, results + ret, cur_index,
-					max_items - ret, &next_index, tag);
+				max_items - ret, &next_index, tag, contig);
 		ret += nr_found;
 		if (next_index == 0)
 			break;
 		cur_index = next_index;
+		if (!nr_found)
+			next_index = 0;
 	}
 	return ret;
 }
-EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
+EXPORT_SYMBOL(__radix_tree_gang_lookup_tag);
 
 /**
  *	radix_tree_delete    -    delete an item from a radix tree
diff -puN mm/filemap.c~mpage_writepages_getblocks mm/filemap.c
--- linux-2.6.12/mm/filemap.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/mm/filemap.c	2005-07-15 00:11:01.000000000 -0700
@@ -649,16 +649,19 @@ unsigned find_get_pages(struct address_s
 /*
  * Like find_get_pages, except we only return pages which are tagged with
  * `tag'.   We update *index to index the next page for the traversal.
+ * If 'contig' is 1, then we return only pages which are contiguous in the
+ * file.
  */
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages)
+			int tag, unsigned int nr_pages, struct page **pages,
+			int contig)
 {
 	unsigned int i;
 	unsigned int ret;
 
 	read_lock_irq(&mapping->tree_lock);
-	ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
-				(void **)pages, *index, nr_pages, tag);
+	ret = __radix_tree_gang_lookup_tag(&mapping->page_tree,
+			(void **)pages, *index, nr_pages, tag, contig);
 	for (i = 0; i < ret; i++)
 		page_cache_get(pages[i]);
 	if (ret)
diff -puN mm/swap.c~mpage_writepages_getblocks mm/swap.c
--- linux-2.6.12/mm/swap.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/mm/swap.c	2005-07-15 00:11:01.000000000 -0700
@@ -384,7 +384,16 @@ unsigned pagevec_lookup_tag(struct pagev
 		pgoff_t *index, int tag, unsigned nr_pages)
 {
 	pvec->nr = find_get_pages_tag(mapping, index, tag,
-					nr_pages, pvec->pages);
+					nr_pages, pvec->pages, 0);
+	return pagevec_count(pvec);
+}
+
+unsigned int
+pagevec_contig_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
+		pgoff_t *index, int tag, unsigned nr_pages)
+{
+	pvec->nr = find_get_pages_tag(mapping, index, tag,
+					nr_pages, pvec->pages, 1);
 	return pagevec_count(pvec);
 }
 

_



WARNING: multiple messages have this Message-ID (diff)
From: Mingming Cao <cmm@us.ibm.com>
To: ext2-devel <ext2-devel@lists.sourceforge.net>,
	Andrew Morton <akpm@osdl.org>,
	"Stephen C. Tweedie" <sct@redhat.com>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	linux-fsdevel@vger.kernel.org
Cc: Badari Pulavarty <pbadari@us.ibm.com>, suparna@in.ibm.com, tytso@mit.edu
Subject: [RFC] [PATCH 3/4]generic getblocks() support in mpage_writepages
Date: Sun, 17 Jul 2005 10:40:55 -0700	[thread overview]
Message-ID: <1121622055.4609.26.camel@localhost.localdomain> (raw)
In-Reply-To: <1110839154.24286.302.camel@dyn318077bld.beaverton.ibm.com>

Updated patch from Suparna for generic support for cluster pages
together in mapge_writepages() to make use of getblocks() 

---

 linux-2.6.12-ming/fs/buffer.c                 |   49 -----
 linux-2.6.12-ming/fs/ext2/inode.c             |   15 -
 linux-2.6.12-ming/fs/ext3/inode.c             |   15 +
 linux-2.6.12-ming/fs/ext3/super.c             |    3 
 linux-2.6.12-ming/fs/hfs/inode.c              |    2 
 linux-2.6.12-ming/fs/hfsplus/inode.c          |    2 
 linux-2.6.12-ming/fs/jfs/inode.c              |   24 ++
 linux-2.6.12-ming/fs/mpage.c                  |  214 ++++++++++++++++++--------
 linux-2.6.12-ming/include/linux/buffer_head.h |    4 
 linux-2.6.12-ming/include/linux/fs.h          |    2 
 linux-2.6.12-ming/include/linux/mpage.h       |   11 -
 linux-2.6.12-ming/include/linux/pagemap.h     |    3 
 linux-2.6.12-ming/include/linux/pagevec.h     |    3 
 linux-2.6.12-ming/include/linux/radix-tree.h  |   14 +
 linux-2.6.12-ming/lib/radix-tree.c            |   25 ++-
 linux-2.6.12-ming/mm/filemap.c                |    9 -
 linux-2.6.12-ming/mm/swap.c                   |   11 +
 17 files changed, 270 insertions(+), 136 deletions(-)

diff -puN fs/buffer.c~mpage_writepages_getblocks fs/buffer.c
--- linux-2.6.12/fs/buffer.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/buffer.c	2005-07-15 00:11:01.000000000 -0700
@@ -2509,53 +2509,10 @@ EXPORT_SYMBOL(nobh_commit_write);
  * that it tries to operate without attaching bufferheads to
  * the page.
  */
-int nobh_writepage(struct page *page, get_block_t *get_block,
-			struct writeback_control *wbc)
+int nobh_writepage(struct page *page, get_blocks_t *get_blocks,
+		struct writeback_control *wbc, writepage_t bh_writepage_fn)
 {
-	struct inode * const inode = page->mapping->host;
-	loff_t i_size = i_size_read(inode);
-	const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
-	unsigned offset;
-	void *kaddr;
-	int ret;
-
-	/* Is the page fully inside i_size? */
-	if (page->index < end_index)
-		goto out;
-
-	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = i_size & (PAGE_CACHE_SIZE-1);
-	if (page->index >= end_index+1 || !offset) {
-		/*
-		 * The page may have dirty, unmapped buffers.  For example,
-		 * they may have been added in ext3_writepage().  Make them
-		 * freeable here, so the page does not leak.
-		 */
-#if 0
-		/* Not really sure about this  - do we need this ? */
-		if (page->mapping->a_ops->invalidatepage)
-			page->mapping->a_ops->invalidatepage(page, offset);
-#endif
-		unlock_page(page);
-		return 0; /* don't care */
-	}
-
-	/*
-	 * The page straddles i_size.  It must be zeroed out on each and every
-	 * writepage invocation because it may be mmapped.  "A file is mapped
-	 * in multiples of the page size.  For a file that is not a multiple of
-	 * the  page size, the remaining memory is zeroed when mapped, and
-	 * writes to that region are not written out to the file."
-	 */
-	kaddr = kmap_atomic(page, KM_USER0);
-	memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
-	flush_dcache_page(page);
-	kunmap_atomic(kaddr, KM_USER0);
-out:
-	ret = mpage_writepage(page, get_block, wbc);
-	if (ret == -EAGAIN)
-		ret = __block_write_full_page(inode, page, get_block, wbc);
-	return ret;
+	return mpage_writepage(page, get_blocks, wbc, bh_writepage_fn);
 }
 EXPORT_SYMBOL(nobh_writepage);
 
diff -puN fs/ext2/inode.c~mpage_writepages_getblocks fs/ext2/inode.c
--- linux-2.6.12/fs/ext2/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/ext2/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -650,12 +650,6 @@ ext2_nobh_prepare_write(struct file *fil
 	return nobh_prepare_write(page,from,to,ext2_get_block);
 }
 
-static int ext2_nobh_writepage(struct page *page,
-			struct writeback_control *wbc)
-{
-	return nobh_writepage(page, ext2_get_block, wbc);
-}
-
 static sector_t ext2_bmap(struct address_space *mapping, sector_t block)
 {
 	return generic_block_bmap(mapping,block,ext2_get_block);
@@ -673,6 +667,12 @@ ext2_get_blocks(struct inode *inode, sec
 	return ret;
 }
 
+static int ext2_nobh_writepage(struct page *page,
+			struct writeback_control *wbc)
+{
+	return nobh_writepage(page, ext2_get_blocks, wbc, ext2_writepage);
+}
+
 static ssize_t
 ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 			loff_t offset, unsigned long nr_segs)
@@ -687,7 +687,8 @@ ext2_direct_IO(int rw, struct kiocb *ioc
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, ext2_get_block);
+        return __mpage_writepages(mapping, wbc, ext2_get_blocks,
+					ext2_writepage);
 }
 
 struct address_space_operations ext2_aops = {
diff -puN fs/ext3/super.c~mpage_writepages_getblocks fs/ext3/super.c
--- linux-2.6.12/fs/ext3/super.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/ext3/super.c	2005-07-15 00:11:01.000000000 -0700
@@ -1353,6 +1353,7 @@ static int ext3_fill_super (struct super
 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
 
 	set_opt(sbi->s_mount_opt, RESERVATION);
+	set_opt(sbi->s_mount_opt, NOBH); /* temp: set nobh default */
 
 	if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
 		goto failed_mount;
@@ -1599,6 +1600,7 @@ static int ext3_fill_super (struct super
 			printk(KERN_ERR "EXT3-fs: Journal does not support "
 			       "requested data journaling mode\n");
 			goto failed_mount3;
+		set_opt(sbi->s_mount_opt, NOBH); /* temp: set nobh default */
 		}
 	default:
 		break;
@@ -1616,6 +1618,7 @@ static int ext3_fill_super (struct super
 				"its supported only with writeback mode\n");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
+		printk("NOBH option set\n");
 	}
 	if (test_opt(sb, DELAYED_ALLOC)) {
 		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
diff -puN fs/ext3/inode.c~mpage_writepages_getblocks fs/ext3/inode.c
--- linux-2.6.12/fs/ext3/inode.c~mpage_writepages_getblocks	2005-07-15 17:32:05.865000480 -0700
+++ linux-2.6.12-ming/fs/ext3/inode.c	2005-07-15 18:06:49.384257408 -0700
@@ -1195,6 +1195,11 @@ get_block:
 }
 
 
+static int ext3_writepages_get_blocks(struct inode *inode, sector_t iblock,
+		unsigned long max_blocks, struct buffer_head *bh, int create)
+{
+	return ext3_direct_io_get_blocks(inode, iblock, max_blocks, bh, create);
+}
 /*
  * `handle' can be NULL if create is zero
  */
@@ -1674,6 +1679,13 @@ out_fail:
 	return ret;
 }
 
+static int
+ext3_writeback_writepage_helper(struct page *page,
+				struct writeback_control *wbc)
+{
+	return block_write_full_page(page, ext3_get_block, wbc);
+}
+
 static int ext3_writeback_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
@@ -1692,7 +1704,8 @@ static int ext3_writeback_writepage(stru
 	}
 
 	if (test_opt(inode->i_sb, NOBH))
-		ret = nobh_writepage(page, ext3_get_block, wbc);
+		ret = nobh_writepage(page, ext3_writepages_get_blocks, wbc,
+			ext3_writeback_writepage_helper);
 	else
 		ret = block_write_full_page(page, ext3_get_block, wbc);
 
diff -puN fs/hfs/inode.c~mpage_writepages_getblocks fs/hfs/inode.c
--- linux-2.6.12/fs/hfs/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/hfs/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -124,7 +124,7 @@ static ssize_t hfs_direct_IO(int rw, str
 static int hfs_writepages(struct address_space *mapping,
 			  struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, hfs_get_block);
+	return mpage_writepages(mapping, wbc, hfs_get_blocks);
 }
 
 struct address_space_operations hfs_btree_aops = {
diff -puN fs/hfsplus/inode.c~mpage_writepages_getblocks fs/hfsplus/inode.c
--- linux-2.6.12/fs/hfsplus/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/hfsplus/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -121,7 +121,7 @@ static ssize_t hfsplus_direct_IO(int rw,
 static int hfsplus_writepages(struct address_space *mapping,
 			      struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, hfsplus_get_block);
+	return mpage_writepages(mapping, wbc, hfsplus_get_blocks);
 }
 
 struct address_space_operations hfsplus_btree_aops = {
diff -puN fs/jfs/inode.c~mpage_writepages_getblocks fs/jfs/inode.c
--- linux-2.6.12/fs/jfs/inode.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/jfs/inode.c	2005-07-15 00:11:01.000000000 -0700
@@ -249,21 +249,41 @@ jfs_get_blocks(struct inode *ip, sector_
 	return rc;
 }
 
+static int
+jfs_mpage_get_blocks(struct inode *ip, sector_t lblock, unsigned long
+			max_blocks, struct buffer_head *bh_result, int create)
+{
+	/*
+	 * fixme: temporary workaround: return one block at a time until
+	 * we figure out why we see exposures with truncate on
+	 * allocating multiple blocks in one shot.
+	 */
+	return jfs_get_blocks(ip, lblock, 1, bh_result, create);
+}
+
 static int jfs_get_block(struct inode *ip, sector_t lblock,
 			 struct buffer_head *bh_result, int create)
 {
 	return jfs_get_blocks(ip, lblock, 1, bh_result, create);
 }
 
+static int jfs_bh_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	return block_write_full_page(page, jfs_get_block, wbc);
+}
+
+
 static int jfs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	return nobh_writepage(page, jfs_get_block, wbc);
+	return nobh_writepage(page, jfs_mpage_get_blocks, wbc, jfs_bh_writepage);
 }
 
 static int jfs_writepages(struct address_space *mapping,
 			struct writeback_control *wbc)
 {
-	return mpage_writepages(mapping, wbc, jfs_get_block);
+        return __mpage_writepages(mapping, wbc, jfs_mpage_get_blocks,
+					jfs_bh_writepage);
 }
 
 static int jfs_readpage(struct file *file, struct page *page)
diff -puN fs/mpage.c~mpage_writepages_getblocks fs/mpage.c
--- linux-2.6.12/fs/mpage.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/fs/mpage.c	2005-07-15 18:06:49.397255432 -0700
@@ -373,6 +373,67 @@ int mpage_readpage(struct page *page, ge
 }
 EXPORT_SYMBOL(mpage_readpage);
 
+struct mpageio {
+	struct bio *bio;
+	struct buffer_head map_bh;
+	unsigned long block_in_file;
+	unsigned long final_block_in_request;
+	sector_t block_in_bio;
+	int boundary;
+	sector_t boundary_block;
+	struct block_device *boundary_bdev;
+};
+
+/*
+ * Maps as many contiguous disk blocks as it can within the range of
+ * the request, and returns the total number of contiguous mapped
+ * blocks in the mpageio.
+ */
+static unsigned long mpage_get_more_blocks(struct mpageio *mio,
+	struct inode *inode, get_blocks_t get_blocks)
+{
+	struct buffer_head map_bh = {.b_state = 0};
+	unsigned long mio_nblocks = mio->map_bh.b_size >> inode->i_blkbits;
+	unsigned long first_unmapped = mio->block_in_file + mio_nblocks;
+	unsigned long next_contig_block = mio->map_bh.b_blocknr + mio_nblocks;
+
+	while ((first_unmapped < mio->final_block_in_request) &&
+		(mio->map_bh.b_size < PAGE_SIZE)) {
+
+		if (get_blocks(inode, first_unmapped,
+			mio->final_block_in_request - first_unmapped,
+			&map_bh, 1))
+			break;
+		if (mio_nblocks && ((map_bh.b_blocknr != next_contig_block) ||
+			map_bh.b_bdev != mio->map_bh.b_bdev))
+			break;
+
+		if (buffer_new(&map_bh)) {
+			int i = 0;
+			for (; i < map_bh.b_size >> inode->i_blkbits; i++)
+				unmap_underlying_metadata(map_bh.b_bdev,
+					map_bh.b_blocknr + i);
+		}
+
+		if (buffer_boundary(&map_bh)) {
+			mio->boundary = 1;
+			mio->boundary_block = map_bh.b_blocknr;
+			mio->boundary_bdev = map_bh.b_bdev;
+		}
+		if (mio_nblocks == 0) {
+			mio->map_bh.b_bdev = map_bh.b_bdev;
+			mio->map_bh.b_blocknr = map_bh.b_blocknr;
+		}
+
+		mio_nblocks += map_bh.b_size >> inode->i_blkbits;
+		first_unmapped = mio->block_in_file + mio_nblocks;
+		next_contig_block = mio->map_bh.b_blocknr + mio_nblocks;
+		mio->map_bh.b_size += map_bh.b_size;
+	}
+
+	return mio_nblocks;
+}
+
 /*
  * Writing is not so simple.
  *
@@ -389,9 +450,9 @@ EXPORT_SYMBOL(mpage_readpage);
  * written, so it can intelligently allocate a suitably-sized BIO.  For now,
  * just allocate full-size (16-page) BIOs.
  */
-static struct bio *
-__mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
-	sector_t *last_block_in_bio, int *ret, struct writeback_control *wbc,
+static int
+__mpage_writepage(struct mpageio *mio, struct page *page,
+	get_blocks_t get_blocks, struct writeback_control *wbc,
 	writepage_t writepage_fn)
 {
 	struct address_space *mapping = page->mapping;
@@ -399,9 +460,8 @@ __mpage_writepage(struct bio *bio, struc
 	const unsigned blkbits = inode->i_blkbits;
 	unsigned long end_index;
 	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
-	sector_t last_block;
+	sector_t last_block, blocks_to_skip;
 	sector_t block_in_file;
-	sector_t blocks[MAX_BUF_PER_PAGE];
 	unsigned page_block;
 	unsigned first_unmapped = blocks_per_page;
 	struct block_device *bdev = NULL;
@@ -409,8 +469,10 @@ __mpage_writepage(struct bio *bio, struc
 	sector_t boundary_block = 0;
 	struct block_device *boundary_bdev = NULL;
 	int length;
-	struct buffer_head map_bh;
 	loff_t i_size = i_size_read(inode);
+	struct buffer_head *map_bh = &mio->map_bh;
+	struct bio *bio = mio->bio;
+	int ret = 0;
 
 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -438,10 +500,13 @@ __mpage_writepage(struct bio *bio, struc
 			if (!buffer_dirty(bh) || !buffer_uptodate(bh))
 				goto confused;
 			if (page_block) {
-				if (bh->b_blocknr != blocks[page_block-1] + 1)
+				if (bh->b_blocknr != map_bh->b_blocknr
+					+ page_block)
 					goto confused;
+			} else {
+				map_bh->b_blocknr = bh->b_blocknr;
+				map_bh->b_size = PAGE_SIZE;
 			}
-			blocks[page_block++] = bh->b_blocknr;
 			boundary = buffer_boundary(bh);
 			if (boundary) {
 				boundary_block = bh->b_blocknr;
@@ -468,33 +533,30 @@ __mpage_writepage(struct bio *bio, struc
 	BUG_ON(!PageUptodate(page));
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
 	last_block = (i_size - 1) >> blkbits;
-	map_bh.b_page = page;
-	for (page_block = 0; page_block < blocks_per_page; ) {
-
-		map_bh.b_state = 0;
-		if (get_block(inode, block_in_file, &map_bh, 1))
-			goto confused;
-		if (buffer_new(&map_bh))
-			unmap_underlying_metadata(map_bh.b_bdev,
-						map_bh.b_blocknr);
-		if (buffer_boundary(&map_bh)) {
-			boundary_block = map_bh.b_blocknr;
-			boundary_bdev = map_bh.b_bdev;
-		}
-		if (page_block) {
-			if (map_bh.b_blocknr != blocks[page_block-1] + 1)
-				goto confused;
-		}
-		blocks[page_block++] = map_bh.b_blocknr;
-		boundary = buffer_boundary(&map_bh);
-		bdev = map_bh.b_bdev;
-		if (block_in_file == last_block)
-			break;
-		block_in_file++;
+	blocks_to_skip = block_in_file - mio->block_in_file;
+	mio->block_in_file = block_in_file;
+	if (blocks_to_skip < (map_bh->b_size >> blkbits)) {
+		map_bh->b_blocknr += blocks_to_skip;
+		map_bh->b_size -= blocks_to_skip << blkbits;
+	} else {
+		map_bh->b_state = 0;
+		map_bh->b_size = 0;
+		if (mio->final_block_in_request > last_block)
+			mio->final_block_in_request = last_block;
+		mpage_get_more_blocks(mio, inode, get_blocks);
 	}
-	BUG_ON(page_block == 0);
+	if (map_bh->b_size < PAGE_SIZE)
+		goto confused;
 
-	first_unmapped = page_block;
+	if (mio->boundary && (mio->boundary_block < map_bh->b_blocknr
+		+ blocks_per_page)) {
+		boundary = 1;
+		boundary_block = mio->boundary_block;
+		boundary_bdev = mio->boundary_bdev;
+	}
+
+	bdev = map_bh->b_bdev;
+	first_unmapped = blocks_per_page;
 
 page_is_mapped:
 	end_index = i_size >> PAGE_CACHE_SHIFT;
@@ -521,12 +583,16 @@ page_is_mapped:
 	/*
 	 * This page will go to BIO.  Do we need to send this BIO off first?
 	 */
-	if (bio && *last_block_in_bio != blocks[0] - 1)
+	if (bio && mio->block_in_bio != map_bh->b_blocknr - 1)
 		bio = mpage_bio_submit(WRITE, bio);
 
 alloc_new:
 	if (bio == NULL) {
-		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
+		/*
+		 * Fixme: bio size can be limited to final_block - block, or
+		 * even mio->map_bh.b_size
+		 */
+		bio = mpage_alloc(bdev, map_bh->b_blocknr << (blkbits - 9),
 				bio_get_nr_vecs(bdev), GFP_NOFS|__GFP_HIGH);
 		if (bio == NULL)
 			goto confused;
@@ -542,6 +608,9 @@ alloc_new:
 		bio = mpage_bio_submit(WRITE, bio);
 		goto alloc_new;
 	}
+	map_bh->b_blocknr += blocks_per_page;
+	map_bh->b_size -= PAGE_SIZE;
+	mio->block_in_file += blocks_per_page;
 
 	/*
 	 * OK, we have our BIO, so we can now mark the buffers clean.  Make
@@ -578,7 +647,8 @@ alloc_new:
 					boundary_block, 1 << blkbits);
 		}
 	} else {
-		*last_block_in_bio = blocks[blocks_per_page - 1];
+		/* we can pack more pages into the bio, don't submit yet */
+		mio->block_in_bio = map_bh->b_blocknr - 1;
 	}
 	goto out;
 
@@ -587,22 +657,23 @@ confused:
 		bio = mpage_bio_submit(WRITE, bio);
 
 	if (writepage_fn) {
-		*ret = (*writepage_fn)(page, wbc);
+		ret = (*writepage_fn)(page, wbc);
 	} else {
-		*ret = -EAGAIN;
+		ret = -EAGAIN;
 		goto out;
 	}
 	/*
 	 * The caller has a ref on the inode, so *mapping is stable
 	 */
-	if (*ret) {
-		if (*ret == -ENOSPC)
+	if (ret) {
+		if (ret == -ENOSPC)
 			set_bit(AS_ENOSPC, &mapping->flags);
 		else
 			set_bit(AS_EIO, &mapping->flags);
 	}
 out:
-	return bio;
+	mio->bio = bio;
+	return ret;
 }
 
 /**
@@ -628,11 +699,21 @@ out:
  */
 int
 mpage_writepages(struct address_space *mapping,
-		struct writeback_control *wbc, get_block_t get_block)
+		struct writeback_control *wbc, get_blocks_t get_blocks)
+{
+	return __mpage_writepages(mapping, wbc, get_blocks,
+                mapping->a_ops->writepage);
+
+}
+int
+__mpage_writepages(struct address_space *mapping,
+                struct writeback_control *wbc, get_blocks_t get_blocks,
+                writepage_t writepage_fn)
 {
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct bio *bio = NULL;
-	sector_t last_block_in_bio = 0;
+	struct inode *inode = mapping->host;
+	const unsigned blkbits = inode->i_blkbits;
 	int ret = 0;
 	int done = 0;
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -642,6 +723,9 @@ mpage_writepages(struct address_space *m
 	pgoff_t end = -1;		/* Inclusive */
 	int scanned = 0;
 	int is_range = 0;
+	struct mpageio mio = {
+		.bio = NULL
+	};
 
 	if (wbc->nonblocking && bdi_write_congested(bdi)) {
 		wbc->encountered_congestion = 1;
@@ -649,7 +733,7 @@ mpage_writepages(struct address_space *m
 	}
 
 	writepage = NULL;
-	if (get_block == NULL)
+	if (get_blocks == NULL)
 		writepage = mapping->a_ops->writepage;
 
 	pagevec_init(&pvec, 0);
@@ -666,12 +750,15 @@ mpage_writepages(struct address_space *m
 		scanned = 1;
 	}
 retry:
+	down_read(&inode->i_alloc_sem);
 	while (!done && (index <= end) &&
-			(nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			PAGECACHE_TAG_DIRTY,
+			(nr_pages = pagevec_contig_lookup_tag(&pvec, mapping,
+			&index, PAGECACHE_TAG_DIRTY,
 			min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) {
 		unsigned i;
 
+		mio.final_block_in_request = min(index, end) <<
+			(PAGE_CACHE_SHIFT - blkbits);
 		scanned = 1;
 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
@@ -696,7 +783,7 @@ retry:
 				unlock_page(page);
 				continue;
 			}
-
+
 			if (wbc->sync_mode != WB_SYNC_NONE)
 				wait_on_page_writeback(page);
 
@@ -717,9 +804,9 @@ retry:
 							&mapping->flags);
 				}
 			} else {
-				bio = __mpage_writepage(bio, page, get_block,
-						&last_block_in_bio, &ret, wbc,
-						page->mapping->a_ops->writepage);
+				ret = __mpage_writepage(&mio, page, get_blocks,
+						wbc, writepage_fn);
+				bio = mio.bio;
 			}
 			if (unlikely(ret == WRITEPAGE_ACTIVATE))
 				unlock_page(page);
@@ -733,6 +820,9 @@ retry:
 		pagevec_release(&pvec);
 		cond_resched();
 	}
+
+	up_read(&inode->i_alloc_sem);
+
 	if (!scanned && !done) {
 		/*
 		 * We hit the last page and there is more work to be done: wrap
@@ -749,18 +839,24 @@ retry:
 	return ret;
 }
 EXPORT_SYMBOL(mpage_writepages);
+EXPORT_SYMBOL(__mpage_writepages);
 
-int mpage_writepage(struct page *page, get_block_t get_block,
-	struct writeback_control *wbc)
+int mpage_writepage(struct page *page, get_blocks_t get_blocks,
+		struct writeback_control *wbc, writepage_t writepage_fn)
 {
 	int ret = 0;
-	struct bio *bio;
-	sector_t last_block_in_bio = 0;
-
-	bio = __mpage_writepage(NULL, page, get_block,
-			&last_block_in_bio, &ret, wbc, NULL);
-	if (bio)
-		mpage_bio_submit(WRITE, bio);
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
+	const unsigned blkbits = inode->i_blkbits;
+	struct mpageio mio = {
+		.final_block_in_request = (page->index + 1) << (PAGE_CACHE_SHIFT
+			- blkbits)
+	};
+
+	ret = __mpage_writepage(&mio, page, get_blocks,
+			wbc, writepage_fn);
+	if (mio.bio)
+		mpage_bio_submit(WRITE, mio.bio);
 
 	return ret;
 }
diff -puN include/linux/buffer_head.h~mpage_writepages_getblocks include/linux/buffer_head.h
--- linux-2.6.12/include/linux/buffer_head.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/buffer_head.h	2005-07-15 00:11:01.000000000 -0700
@@ -206,8 +206,8 @@ int file_fsync(struct file *, struct den
 int nobh_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 int nobh_commit_write(struct file *, struct page *, unsigned, unsigned);
 int nobh_truncate_page(struct address_space *, loff_t);
-int nobh_writepage(struct page *page, get_block_t *get_block,
-                        struct writeback_control *wbc);
+int nobh_writepage(struct page *page, get_blocks_t *get_blocks,
+	struct writeback_control *wbc, writepage_t bh_writepage);
 
 
 /*
diff -puN include/linux/fs.h~mpage_writepages_getblocks include/linux/fs.h
--- linux-2.6.12/include/linux/fs.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/fs.h	2005-07-15 00:11:01.000000000 -0700
@@ -305,6 +305,8 @@ struct page;
 struct address_space;
 struct writeback_control;
 
+typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
+
 struct address_space_operations {
 	int (*writepage)(struct page *page, struct writeback_control *wbc);
 	int (*readpage)(struct file *, struct page *);
diff -puN include/linux/mpage.h~mpage_writepages_getblocks include/linux/mpage.h
--- linux-2.6.12/include/linux/mpage.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/mpage.h	2005-07-15 18:06:49.398255280 -0700
@@ -11,15 +11,18 @@
  */
 
 struct writeback_control;
-typedef int (writepage_t)(struct page *page, struct writeback_control *wbc);
 
 int mpage_readpages(struct address_space *mapping, struct list_head *pages,
 				unsigned nr_pages, get_block_t get_block);
 int mpage_readpage(struct page *page, get_block_t get_block);
+
 int mpage_writepages(struct address_space *mapping,
-		struct writeback_control *wbc, get_block_t get_block);
-int mpage_writepage(struct page *page, get_block_t *get_block,
-		struct writeback_control *wbc);
+                struct writeback_control *wbc, get_blocks_t get_blocks);
+int mpage_writepage(struct page *page, get_blocks_t *get_blocks,
+                struct writeback_control *wbc, writepage_t writepage);
+int __mpage_writepages(struct address_space *mapping,
+                struct writeback_control *wbc, get_blocks_t get_blocks,
+                writepage_t writepage);
 
 static inline int
 generic_writepages(struct address_space *mapping, struct writeback_control *wbc)
diff -puN include/linux/pagemap.h~mpage_writepages_getblocks include/linux/pagemap.h
--- linux-2.6.12/include/linux/pagemap.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/pagemap.h	2005-07-15 00:11:01.000000000 -0700
@@ -73,7 +73,8 @@ extern struct page * find_or_create_page
 unsigned find_get_pages(struct address_space *mapping, pgoff_t start,
 			unsigned int nr_pages, struct page **pages);
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages);
+			int tag, unsigned int nr_pages, struct page **pages,
+			int contig);
 
 /*
  * Returns locked page at given index in given cache, creating it if needed.
diff -puN include/linux/pagevec.h~mpage_writepages_getblocks include/linux/pagevec.h
--- linux-2.6.12/include/linux/pagevec.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/pagevec.h	2005-07-15 00:11:01.000000000 -0700
@@ -28,6 +28,9 @@ unsigned pagevec_lookup(struct pagevec *
 unsigned pagevec_lookup_tag(struct pagevec *pvec,
 		struct address_space *mapping, pgoff_t *index, int tag,
 		unsigned nr_pages);
+unsigned pagevec_contig_lookup_tag(struct pagevec *pvec,
+		struct address_space *mapping, pgoff_t *index, int tag,
+		unsigned nr_pages);
 
 static inline void pagevec_init(struct pagevec *pvec, int cold)
 {
diff -puN include/linux/radix-tree.h~mpage_writepages_getblocks include/linux/radix-tree.h
--- linux-2.6.12/include/linux/radix-tree.h~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/include/linux/radix-tree.h	2005-07-15 00:11:01.000000000 -0700
@@ -59,8 +59,18 @@ void *radix_tree_tag_clear(struct radix_
 int radix_tree_tag_get(struct radix_tree_root *root,
 			unsigned long index, int tag);
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
-		unsigned long first_index, unsigned int max_items, int tag);
+__radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+		unsigned long first_index, unsigned int max_items, int tag,
+		int contig);
+
+static inline unsigned int radix_tree_gang_lookup_tag(struct radix_tree_root
+		*root, void **results, unsigned long first_index,
+		unsigned int max_items, int tag)
+{
+	return __radix_tree_gang_lookup_tag(root, results, first_index,
+		max_items, tag, 0);
+}
+
 int radix_tree_tagged(struct radix_tree_root *root, int tag);
 
 static inline void radix_tree_preload_end(void)
diff -puN lib/radix-tree.c~mpage_writepages_getblocks lib/radix-tree.c
--- linux-2.6.12/lib/radix-tree.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/lib/radix-tree.c	2005-07-15 00:11:01.000000000 -0700
@@ -557,12 +557,13 @@ EXPORT_SYMBOL(radix_tree_gang_lookup);
  */
 static unsigned int
 __lookup_tag(struct radix_tree_root *root, void **results, unsigned long index,
-	unsigned int max_items, unsigned long *next_index, int tag)
+	unsigned int max_items, unsigned long *next_index, int tag, int contig)
 {
 	unsigned int nr_found = 0;
 	unsigned int shift;
 	unsigned int height = root->height;
 	struct radix_tree_node *slot;
+	unsigned long cindex = (contig && (*next_index)) ? *next_index : -1;
 
 	shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
 	slot = root->rnode;
@@ -575,6 +576,11 @@ __lookup_tag(struct radix_tree_root *roo
 				BUG_ON(slot->slots[i] == NULL);
 				break;
 			}
+			if (contig && index >= cindex) {
+				/* break in contiguity */
+				index = 0;
+				goto out;
+			}
 			index &= ~((1UL << shift) - 1);
 			index += 1UL << shift;
 			if (index == 0)
@@ -593,6 +599,10 @@ __lookup_tag(struct radix_tree_root *roo
 					results[nr_found++] = slot->slots[j];
 					if (nr_found == max_items)
 						goto out;
+				} else if (contig && nr_found) {
+					/* break in contiguity */
+					index = 0;
+					goto out;
 				}
 			}
 		}
@@ -618,29 +628,32 @@ out:
  *	returns the number of items which were placed at *@results.
  */
 unsigned int
-radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
-		unsigned long first_index, unsigned int max_items, int tag)
+__radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
+		unsigned long first_index, unsigned int max_items, int tag,
+		int contig)
 {
 	const unsigned long max_index = radix_tree_maxindex(root->height);
 	unsigned long cur_index = first_index;
+	unsigned long next_index = 0;	/* Index of next contiguous search */
 	unsigned int ret = 0;
 
 	while (ret < max_items) {
 		unsigned int nr_found;
-		unsigned long next_index;	/* Index of next search */
 
 		if (cur_index > max_index)
 			break;
 		nr_found = __lookup_tag(root, results + ret, cur_index,
-					max_items - ret, &next_index, tag);
+				max_items - ret, &next_index, tag, contig);
 		ret += nr_found;
 		if (next_index == 0)
 			break;
 		cur_index = next_index;
+		if (!nr_found)
+			next_index = 0;
 	}
 	return ret;
 }
-EXPORT_SYMBOL(radix_tree_gang_lookup_tag);
+EXPORT_SYMBOL(__radix_tree_gang_lookup_tag);
 
 /**
  *	radix_tree_delete    -    delete an item from a radix tree
diff -puN mm/filemap.c~mpage_writepages_getblocks mm/filemap.c
--- linux-2.6.12/mm/filemap.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/mm/filemap.c	2005-07-15 00:11:01.000000000 -0700
@@ -649,16 +649,19 @@ unsigned find_get_pages(struct address_s
 /*
  * Like find_get_pages, except we only return pages which are tagged with
  * `tag'.   We update *index to index the next page for the traversal.
+ * If 'contig' is 1, then we return only pages which are contiguous in the
+ * file.
  */
 unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index,
-			int tag, unsigned int nr_pages, struct page **pages)
+			int tag, unsigned int nr_pages, struct page **pages,
+			int contig)
 {
 	unsigned int i;
 	unsigned int ret;
 
 	read_lock_irq(&mapping->tree_lock);
-	ret = radix_tree_gang_lookup_tag(&mapping->page_tree,
-				(void **)pages, *index, nr_pages, tag);
+	ret = __radix_tree_gang_lookup_tag(&mapping->page_tree,
+			(void **)pages, *index, nr_pages, tag, contig);
 	for (i = 0; i < ret; i++)
 		page_cache_get(pages[i]);
 	if (ret)
diff -puN mm/swap.c~mpage_writepages_getblocks mm/swap.c
--- linux-2.6.12/mm/swap.c~mpage_writepages_getblocks	2005-07-15 00:11:01.000000000 -0700
+++ linux-2.6.12-ming/mm/swap.c	2005-07-15 00:11:01.000000000 -0700
@@ -384,7 +384,16 @@ unsigned pagevec_lookup_tag(struct pagev
 		pgoff_t *index, int tag, unsigned nr_pages)
 {
 	pvec->nr = find_get_pages_tag(mapping, index, tag,
-					nr_pages, pvec->pages);
+					nr_pages, pvec->pages, 0);
+	return pagevec_count(pvec);
+}
+
+unsigned int
+pagevec_contig_lookup_tag(struct pagevec *pvec, struct address_space *mapping,
+		pgoff_t *index, int tag, unsigned nr_pages)
+{
+	pvec->nr = find_get_pages_tag(mapping, index, tag,
+					nr_pages, pvec->pages, 1);
 	return pagevec_count(pvec);
 }
 

_




-------------------------------------------------------
SF.Net email is sponsored by: Discover Easy Linux Migration Strategies
from IBM. Find simple to follow Roadmaps, straightforward articles,
informative Webcasts and more! Get everything you need to get up to
speed, fast. http://ads.osdn.com/?ad_id=7477&alloc_id=16492&op=click

  parent reply	other threads:[~2005-07-17 17:44 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <1110839154.24286.302.camel@dyn318077bld.beaverton.ibm.com>
2005-07-17 17:40 ` [RFC] [PATCH 0/4]Multiple block allocation and delayed allocation for ext3 Mingming Cao
2005-07-17 17:45   ` Mingming Cao
2005-07-17 17:45     ` Mingming Cao
2005-07-17 17:40 ` [RFC] [PATCH 1/4]Multiple block " Mingming Cao
2005-07-17 17:40   ` Mingming Cao
2005-07-17 17:40 ` [RFC] [PATCH 2/4]delayed " Mingming Cao
2005-07-18  1:47   ` [Ext2-devel] " Andreas Dilger
2005-07-18 17:32     ` Mingming Cao
2005-07-19  0:25     ` Badari Pulavarty
2005-07-26 22:52   ` Andrew Morton
2005-07-26 22:52     ` Andrew Morton
2005-07-26 22:55     ` Badari Pulavarty
2005-07-26 22:55       ` Badari Pulavarty
2005-07-17 17:40 ` Mingming Cao [this message]
2005-07-17 17:40   ` [RFC] [PATCH 3/4]generic getblocks() support in mpage_writepages Mingming Cao
2005-07-17 17:41 ` [RFC] [PATCH 4/4]add ext3 writeback writpages Mingming Cao
2005-07-17 17:41   ` Mingming Cao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1121622055.4609.26.camel@localhost.localdomain \
    --to=cmm@us.ibm.com \
    --cc=akpm@osdl.org \
    --cc=ext2-devel@lists.sourceforge.net \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbadari@us.ibm.com \
    --cc=sct@redhat.com \
    --cc=suparna@in.ibm.com \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.