Re: [patch 10/19] direct-to-BIO I/O for swapcache pages

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Andrew Morton <akpm@zip.com.au>
To: lkml <linux-kernel@vger.kernel.org>
Subject: Re: [patch 10/19] direct-to-BIO I/O for swapcache pages
Date: Mon, 17 Jun 2002 00:13:31 -0700	[thread overview]
Message-ID: <3D0D8C1B.D14F5171@zip.com.au> (raw)
In-Reply-To: 3D0D873A.405ED0BB@zip.com.au

Andrew Morton wrote:
> 
> ..
> I have an
> additional patch which converts swap to use mpage_writepages(), so we swap
> out in 16-page BIOs.  It works fine, but I don't intend to submit that.
> There just doesn't seem to be any significant advantage to it.
> 

Just for the record, here is the patch which converts swap writeout to
use large BIOs (via mpage_writepages):


--- 2.5.21/fs/buffer.c~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/fs/buffer.c	Sat Jun 15 17:15:02 2002
@@ -397,7 +397,7 @@ __get_hash_table(struct block_device *bd
 	struct buffer_head *head;
 	struct page *page;
 
-	index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+	index = block >> (mapping_page_shift(bd_mapping) - bd_inode->i_blkbits);
 	page = find_get_page(bd_mapping, index);
 	if (!page)
 		goto out;
@@ -1667,7 +1667,7 @@ static int __block_write_full_page(struc
 	 * handle that here by just cleaning them.
 	 */
 
-	block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	block = page->index << (page_shift(page) - inode->i_blkbits);
 	head = page_buffers(page);
 	bh = head;
 
@@ -1811,8 +1811,8 @@ static int __block_prepare_write(struct 
 	char *kaddr = kmap(page);
 
 	BUG_ON(!PageLocked(page));
-	BUG_ON(from > PAGE_CACHE_SIZE);
-	BUG_ON(to > PAGE_CACHE_SIZE);
+	BUG_ON(from > page_size(page));
+	BUG_ON(to > page_size(page));
 	BUG_ON(from > to);
 
 	blocksize = 1 << inode->i_blkbits;
@@ -1821,7 +1821,7 @@ static int __block_prepare_write(struct 
 	head = page_buffers(page);
 
 	bbits = inode->i_blkbits;
-	block = page->index << (PAGE_CACHE_SHIFT - bbits);
+	block = page->index << (page_shift(page) - bbits);
 
 	for(bh = head, block_start = 0; bh != head || !block_start;
 	    block++, block_start=block_end, bh = bh->b_this_page) {
@@ -1966,8 +1966,8 @@ int block_read_full_page(struct page *pa
 		create_empty_buffers(page, blocksize, 0);
 	head = page_buffers(page);
 
-	blocks = PAGE_CACHE_SIZE >> inode->i_blkbits;
-	iblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	blocks = page_size(page) >> inode->i_blkbits;
+	iblock = page->index << (page_shift(page) - inode->i_blkbits);
 	lblock = (inode->i_size+blocksize-1) >> inode->i_blkbits;
 	bh = head;
 	nr = 0;
@@ -2054,7 +2054,7 @@ int generic_cont_expand(struct inode *in
 	if (size > inode->i_sb->s_maxbytes)
 		goto out;
 
-	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
+	offset = (size & (mapping_page_size(mapping) - 1)); /* Within page */
 
 	/* ugh.  in prepare/commit_write, if from==to==start of block, we 
 	** skip the prepare.  make sure we never send an offset for the start
@@ -2063,7 +2063,7 @@ int generic_cont_expand(struct inode *in
 	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
 		offset++;
 	}
-	index = size >> PAGE_CACHE_SHIFT;
+	index = size >> mapping_page_shift(mapping);
 	err = -ENOMEM;
 	page = grab_cache_page(mapping, index);
 	if (!page)
@@ -2097,31 +2097,31 @@ int cont_prepare_write(struct page *page
 	unsigned blocksize = 1 << inode->i_blkbits;
 	char *kaddr;
 
-	while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
+	while(page->index > (pgpos = *bytes>>page_shift(page))) {
 		status = -ENOMEM;
 		new_page = grab_cache_page(mapping, pgpos);
 		if (!new_page)
 			goto out;
 		/* we might sleep */
-		if (*bytes>>PAGE_CACHE_SHIFT != pgpos) {
+		if (*bytes>>page_shift(page) != pgpos) {
 			unlock_page(new_page);
 			page_cache_release(new_page);
 			continue;
 		}
-		zerofrom = *bytes & ~PAGE_CACHE_MASK;
+		zerofrom = *bytes & ~page_mask(page);
 		if (zerofrom & (blocksize-1)) {
 			*bytes |= (blocksize-1);
 			(*bytes)++;
 		}
 		status = __block_prepare_write(inode, new_page, zerofrom,
-						PAGE_CACHE_SIZE, get_block);
+						page_size(new_page), get_block);
 		if (status)
 			goto out_unmap;
 		kaddr = page_address(new_page);
-		memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
+		memset(kaddr+zerofrom, 0, page_size(new_page)-zerofrom);
 		flush_dcache_page(new_page);
 		__block_commit_write(inode, new_page,
-				zerofrom, PAGE_CACHE_SIZE);
+				zerofrom, page_size(new_page));
 		kunmap(new_page);
 		unlock_page(new_page);
 		page_cache_release(new_page);
@@ -2132,7 +2132,7 @@ int cont_prepare_write(struct page *page
 		zerofrom = offset;
 	} else {
 		/* page covers the boundary, find the boundary offset */
-		zerofrom = *bytes & ~PAGE_CACHE_MASK;
+		zerofrom = *bytes & ~page_mask(page);
 
 		/* if we will expand the thing last block will be filled */
 		if (to > zerofrom && (zerofrom & (blocksize-1))) {
@@ -2192,7 +2192,7 @@ int generic_commit_write(struct file *fi
 		unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	loff_t pos = ((loff_t)page->index << page_shift(page)) + to;
 	__block_commit_write(inode,page,from,to);
 	kunmap(page);
 	if (pos > inode->i_size) {
@@ -2205,8 +2205,8 @@ int generic_commit_write(struct file *fi
 int block_truncate_page(struct address_space *mapping,
 			loff_t from, get_block_t *get_block)
 {
-	unsigned long index = from >> PAGE_CACHE_SHIFT;
-	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned long index = from >> mapping_page_shift(mapping);
+	unsigned offset = from & (mapping_page_size(mapping) - 1);
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
 	struct page *page;
@@ -2221,7 +2221,7 @@ int block_truncate_page(struct address_s
 		return 0;
 
 	length = blocksize - length;
-	iblock = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	iblock = index << (mapping_page_shift(mapping) - inode->i_blkbits);
 	
 	page = grab_cache_page(mapping, index);
 	err = -ENOMEM;
@@ -2283,7 +2283,7 @@ out:
 int block_write_full_page(struct page *page, get_block_t *get_block)
 {
 	struct inode * const inode = page->mapping->host;
-	const unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	const unsigned long end_index = inode->i_size >> page_shift(page);
 	unsigned offset;
 	char *kaddr;
 
@@ -2292,7 +2292,7 @@ int block_write_full_page(struct page *p
 		return __block_write_full_page(inode, page, get_block);
 
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+	offset = inode->i_size & (page_size(page) - 1);
 	if (page->index >= end_index+1 || !offset) {
 		unlock_page(page);
 		return -EIO;
@@ -2300,7 +2300,7 @@ int block_write_full_page(struct page *p
 
 	/* The page straddles i_size */
 	kaddr = kmap(page);
-	memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
+	memset(kaddr + offset, 0, page_size(page) - offset);
 	flush_dcache_page(page);
 	kunmap(page);
 	return __block_write_full_page(inode, page, get_block);
--- 2.5.21/fs/mpage.c~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/fs/mpage.c	Sat Jun 15 17:15:02 2002
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/bio.h>
 #include <linux/fs.h>
+#include <linux/pagemap.h>
 #include <linux/buffer_head.h>
 #include <linux/blkdev.h>
 #include <linux/highmem.h>
@@ -22,7 +23,7 @@
 
 /*
  * The largest-sized BIO which this code will assemble, in bytes.  Set this
- * to PAGE_CACHE_SIZE if your drivers are broken.
+ * to PAGE_SIZE_MAX if your drivers are broken.
  */
 #define MPAGE_BIO_MAX_SIZE BIO_MAX_SIZE
 
@@ -165,7 +166,7 @@ do_mpage_readpage(struct bio *bio, struc
 {
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = page_size(page) >> blkbits;
 	const unsigned blocksize = 1 << blkbits;
 	struct bio_vec *bvec;
 	sector_t block_in_file;
@@ -175,23 +176,24 @@ do_mpage_readpage(struct bio *bio, struc
 	unsigned page_block;
 	unsigned first_hole = blocks_per_page;
 	struct block_device *bdev = NULL;
-	struct buffer_head bh;
+	struct buffer_head map_bh;
 
 	if (page_has_buffers(page))
 		goto confused;
 
-	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = page->index << (page_shift(page) - blkbits);
 	last_file_block = (inode->i_size + blocksize - 1) >> blkbits;
+	map_bh.b_page = page;
 
 	for (page_block = 0; page_block < blocks_per_page;
 				page_block++, block_in_file++) {
-		bh.b_state = 0;
+		map_bh.b_state = 0;
 		if (block_in_file < last_file_block) {
-			if (get_block(inode, block_in_file, &bh, 0))
+			if (get_block(inode, block_in_file, &map_bh, 0))
 				goto confused;
 		}
 
-		if (!buffer_mapped(&bh)) {
+		if (!buffer_mapped(&map_bh)) {
 			if (first_hole == blocks_per_page)
 				first_hole = page_block;
 			continue;
@@ -202,18 +204,18 @@ do_mpage_readpage(struct bio *bio, struc
 
 		if (page_block) {
 			/* Contiguous blocks? */
-			if (bh.b_blocknr != last_page_block + 1)
+			if (map_bh.b_blocknr != last_page_block + 1)
 				goto confused;
 		} else {
-			first_page_block = bh.b_blocknr;
+			first_page_block = map_bh.b_blocknr;
 		}
-		last_page_block = bh.b_blocknr;
-		bdev = bh.b_bdev;
+		last_page_block = map_bh.b_blocknr;
+		bdev = map_bh.b_bdev;
 	}
 
 	if (first_hole != blocks_per_page) {
 		memset(kmap(page) + (first_hole << blkbits), 0,
-				PAGE_CACHE_SIZE - (first_hole << blkbits));
+				page_size(page) - (first_hole << blkbits));
 		flush_dcache_page(page);
 		kunmap(page);
 		if (first_hole == 0) {
@@ -231,7 +233,7 @@ do_mpage_readpage(struct bio *bio, struc
 		bio = mpage_bio_submit(READ, bio);
 
 	if (bio == NULL) {
-		unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / PAGE_CACHE_SIZE;
+		unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / page_size(page);
 
 		if (nr_bvecs > nr_pages)
 			nr_bvecs = nr_pages;
@@ -246,7 +248,7 @@ do_mpage_readpage(struct bio *bio, struc
 	bvec->bv_len = (first_hole << blkbits);
 	bvec->bv_offset = 0;
 	bio->bi_size += bvec->bv_len;
-	if (buffer_boundary(&bh) || (first_hole != blocks_per_page))
+	if (buffer_boundary(&map_bh) || (first_hole != blocks_per_page))
 		bio = mpage_bio_submit(READ, bio);
 	else
 		*last_block_in_bio = last_page_block;
@@ -324,7 +326,7 @@ mpage_writepage(struct bio *bio, struct 
 	struct inode *inode = page->mapping->host;
 	const unsigned blkbits = inode->i_blkbits;
 	unsigned long end_index;
-	const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+	const unsigned blocks_per_page = page_size(page) >> blkbits;
 	struct bio_vec *bvec;
 	sector_t last_file_block;
 	sector_t block_in_file;
@@ -387,13 +389,14 @@ mpage_writepage(struct bio *bio, struct 
 	 * The page has no buffers: map it to disk
 	 */
 	BUG_ON(!PageUptodate(page));
-	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
+	block_in_file = page->index << (page_shift(page) - blkbits);
 	last_file_block = (inode->i_size - 1) >> blkbits;
 	for (page_block = 0; page_block < blocks_per_page;
 				page_block++, block_in_file++) {
 		struct buffer_head map_bh;
 
 		map_bh.b_state = 0;
+		map_bh.b_page = page;
 		if (get_block(inode, block_in_file, &map_bh, 1))
 			goto confused;
 		if (buffer_new(&map_bh))
@@ -416,13 +419,13 @@ mpage_writepage(struct bio *bio, struct 
 
 	first_unmapped = page_block;
 
-	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	end_index = inode->i_size >> page_shift(page);
 	if (page->index >= end_index) {
-		unsigned offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+		unsigned offset = inode->i_size & (page_size(page) - 1);
 
 		if (page->index > end_index || !offset)
 			goto confused;
-		memset(kmap(page) + offset, 0, PAGE_CACHE_SIZE - offset);
+		memset(kmap(page) + offset, 0, page_size(page) - offset);
 		flush_dcache_page(page);
 		kunmap(page);
 	}
@@ -431,13 +434,17 @@ page_is_mapped:
 
 	/*
 	 * This page will go to BIO.  Do we need to send this BIO off first?
+	 * Check for changed bdev - swapper_space striping does this.
 	 */
-	if (bio && (bio->bi_idx == bio->bi_vcnt ||
-				*last_block_in_bio != first_page_block - 1))
-		bio = mpage_bio_submit(WRITE, bio);
+	if (bio) {
+		if ((bio->bi_idx == bio->bi_vcnt) ||
+				(*last_block_in_bio != first_page_block - 1) ||
+				(bio->bi_bdev != bdev))
+			bio = mpage_bio_submit(WRITE, bio);
+	}
 
 	if (bio == NULL) {
-		unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / PAGE_CACHE_SIZE;
+		unsigned nr_bvecs = MPAGE_BIO_MAX_SIZE / page_size(page);
 
 		bio = mpage_alloc(bdev, first_page_block << (blkbits - 9),
 					nr_bvecs, GFP_NOFS);
--- 2.5.21/include/linux/pagemap.h~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/include/linux/pagemap.h	Sat Jun 15 17:15:02 2002
@@ -22,6 +22,12 @@
 #define PAGE_CACHE_MASK		PAGE_MASK
 #define PAGE_CACHE_ALIGN(addr)	(((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
 
+#if PAGE_SIZE > PAGE_CACHE_SIZE
+#define PAGE_SIZE_MAX PAGE_SIZE
+#else
+#define PAGE_SIZE_MAX PAGE_CACHE_SIZE
+#endif
+
 #define page_cache_get(x)	get_page(x)
 extern void page_cache_release(struct page *);
 
@@ -97,5 +103,35 @@ static inline void wait_on_page_writebac
 		wait_on_page_bit(page, PG_writeback);
 }
 
+static inline unsigned mapping_page_size(struct address_space *mapping)
+{
+	return (mapping == &swapper_space) ? PAGE_SIZE : PAGE_CACHE_SIZE;
+}
+
+static inline unsigned mapping_page_shift(struct address_space *mapping)
+{
+	return (mapping == &swapper_space) ? PAGE_SHIFT : PAGE_CACHE_SHIFT;
+}
+
+static inline unsigned mapping_page_mask(struct address_space *mapping)
+{
+	return (mapping == &swapper_space) ? PAGE_MASK : PAGE_CACHE_MASK;
+}
+
+static inline unsigned page_size(struct page *page)
+{
+	return mapping_page_size(page->mapping);
+}
+
+static inline unsigned page_shift(struct page *page)
+{
+	return mapping_page_shift(page->mapping);
+}
+
+static inline unsigned page_mask(struct page *page)
+{
+	return mapping_page_mask(page->mapping);
+}
+
 extern void end_page_writeback(struct page *page);
 #endif /* _LINUX_PAGEMAP_H */
--- 2.5.21/mm/page_io.c~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/mm/page_io.c	Sat Jun 15 17:15:03 2002
@@ -15,6 +15,7 @@
 #include <linux/pagemap.h>
 #include <linux/swap.h>
 #include <linux/bio.h>
+#include <linux/mpage.h>
 #include <linux/buffer_head.h>
 #include <asm/pgtable.h>
 #include <linux/swapops.h>
@@ -35,6 +36,32 @@ swap_get_block(struct inode *inode, sect
 	return 0;
 }
 
+/*
+ * swap_write_get_block() is for use by mpage_writepages().  If it sees a stale
+ * swapcache page (which doesn't need writing), swap_write_get_block() will
+ * return "failure".  This causes mpage_writepages() to send off its current
+ * BIO and to fall back to swap_writepage().  Which can simply unlock the page.
+ */
+static int
+swap_write_get_block(struct inode *inode, sector_t iblock,
+		struct buffer_head *bh_result, int create)
+{
+	if (remove_exclusive_swap_page(bh_result->b_page))
+		return -1;
+	return swap_get_block(inode, iblock, bh_result, create);
+}
+
+/*
+ * We may have stale swap cache pages in memory: notice them here and get
+ * rid of the unnecessary final write.
+ */
+static int swap_writepage(struct page *page)
+{
+	printk("swap_writepage\n");
+	unlock_page(page);
+	return 0;
+}
+
 static struct bio *
 get_swap_bio(int gfp_flags, struct page *page, bio_end_io_t end_io)
 {
@@ -57,17 +84,6 @@ get_swap_bio(int gfp_flags, struct page 
 	return bio;
 }
 
-static void end_swap_bio_write(struct bio *bio)
-{
-	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
-	struct page *page = bio->bi_io_vec[0].bv_page;
-
-	if (!uptodate)
-		SetPageError(page);
-	end_page_writeback(page);
-	bio_put(bio);
-}
-
 static void end_swap_bio_read(struct bio *bio)
 {
 	const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -83,32 +99,6 @@ static void end_swap_bio_read(struct bio
 	bio_put(bio);
 }
 
-/*
- * We may have stale swap cache pages in memory: notice
- * them here and get rid of the unnecessary final write.
- */
-static int swap_writepage(struct page *page)
-{
-	struct bio *bio;
-	int ret = 0;
-
-	if (remove_exclusive_swap_page(page)) {
-		unlock_page(page);
-		goto out;
-	}
-	bio = get_swap_bio(GFP_NOFS, page, end_swap_bio_write);
-	if (bio == NULL) {
-		ret = -ENOMEM;
-		goto out;
-	}
-	kstat.pswpout++;
-	SetPageWriteback(page);
-	unlock_page(page);
-	submit_bio(WRITE, bio);
-out:
-	return ret;
-}
-
 int swap_readpage(struct file *file, struct page *page)
 {
 	struct bio *bio;
@@ -125,30 +115,75 @@ int swap_readpage(struct file *file, str
 out:
 	return ret;
 }
+
+static int swap_writepages(struct address_space *mapping, int *nr_to_write)
+{
+	int to_write = *nr_to_write;
+	int ret;
+
+	ret = mpage_writepages(mapping, nr_to_write, swap_write_get_block);
+	kstat.pswpout += to_write - *nr_to_write;
+	return ret;
+}
+
 /*
  * swapper_space doesn't have a real inode, so it gets a special vm_writeback()
  * so we don't need swap special cases in generic_vm_writeback().
  *
- * Swap pages are PageLocked and PageWriteback while under writeout so that
- * memory allocators will throttle against them.
+ * Swap pages are PageWriteback while under writeout so that memory allocators
+ * will throttle against them.
  */
 static int swap_vm_writeback(struct page *page, int *nr_to_write)
 {
-	struct address_space *mapping = page->mapping;
-
 	unlock_page(page);
-	return generic_writepages(mapping, nr_to_write);
+	return swap_writepages(page->mapping, nr_to_write);
 }
 
 struct address_space_operations swap_aops = {
 	vm_writeback:	swap_vm_writeback,
 	writepage:	swap_writepage,
+	writepages:	swap_writepages,
 	readpage:	swap_readpage,
 	sync_page:	block_sync_page,
 	set_page_dirty:	__set_page_dirty_nobuffers,
 };
 
 /*
+ * Primitive swap readahead code. We simply read an aligned block of
+ * (1 << page_cluster) entries in the swap area. This method is chosen
+ * because it doesn't cost us any seek time.  We also make sure to queue
+ * the 'original' request together with the readahead ones...
+ *
+ * Readahead is performed against a single device.  Which is perhaps suboptimal
+ * when striped swap is being used.  But given that swap uses a one meg chunk
+ * size for striping, chances are that readahead is reading the right pages.
+ *
+ * It would be possible to use mpage and the generic readahead code here.
+ * We'd have to clone mpage_readpages because add_to_swap_cache() does special
+ * things.  Doubtful if all this would help much, really.
+ */
+void swapin_readahead(swp_entry_t entry)
+{
+	int i, num;
+	unsigned long offset;
+
+	/*
+	 * Get the number of handles we should do readahead io to.
+	 */
+	num = valid_swaphandles(entry, &offset);
+	for (i = 0; i < num; offset++, i++) {
+		struct page *new_page;
+		swp_entry_t ra_entry;
+
+		ra_entry = swp_entry(swp_type(entry), offset);
+		new_page = read_swap_cache_async(ra_entry);
+		if (!new_page)
+			break;
+		page_cache_release(new_page);
+	}
+}
+
+/*
  * A scruffy utility function to read or write an arbitrary swap page
  * and wait on the I/O.
  */
--- 2.5.21/mm/swap_state.c~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/mm/swap_state.c	Sat Jun 15 17:15:03 2002
@@ -8,13 +8,11 @@
  */
 
 #include <linux/mm.h>
-#include <linux/kernel_stat.h>
 #include <linux/swap.h>
 #include <linux/swapctl.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
 #include <linux/smp_lock.h>
-#include <linux/buffer_head.h>	/* block_sync_page() */
 
 #include <asm/pgtable.h>
 
@@ -124,7 +122,7 @@ void delete_from_swap_cache(struct page 
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
-	BUG_ON(page_has_buffers(page));
+	BUG_ON(PagePrivate(page));
   
 	entry.val = page->index;
 
@@ -192,7 +190,7 @@ int move_from_swap_cache(struct page *pa
 
 	BUG_ON(!PageLocked(page));
 	BUG_ON(PageWriteback(page));
-	BUG_ON(page_has_buffers(page));
+	BUG_ON(PagePrivate(page));
 
 	write_lock(&swapper_space.page_lock);
 	write_lock(&mapping->page_lock);
--- 2.5.21/mm/swapfile.c~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/mm/swapfile.c	Sat Jun 15 17:15:03 2002
@@ -37,6 +37,11 @@ struct swap_info_struct swap_info[MAX_SW
 
 #define SWAPFILE_CLUSTER 256
 
+/*
+ * Switch to a new device after this many pages
+ */
+#define SWAP_STRIPE_PAGES	((1024*1024)/PAGE_SIZE)
+
 static inline int scan_swap_map(struct swap_info_struct *si)
 {
 	unsigned long offset;
@@ -47,7 +52,8 @@ static inline int scan_swap_map(struct s
 	 * first-free allocation, starting a new cluster.  This
 	 * prevents us from scattering swap pages all over the entire
 	 * swap partition, so that we reduce overall disk seek times
-	 * between swap pages.  -- sct */
+	 * between swap pages.  -- sct
+	 */
 	if (si->cluster_nr) {
 		while (si->cluster_next <= si->highest_bit) {
 			offset = si->cluster_next++;
@@ -59,29 +65,35 @@ static inline int scan_swap_map(struct s
 	}
 	si->cluster_nr = SWAPFILE_CLUSTER;
 
-	/* try to find an empty (even not aligned) cluster. */
+	/*
+	 * Try to find an empty (even not aligned) cluster
+	 */
 	offset = si->lowest_bit;
- check_next_cluster:
-	if (offset+SWAPFILE_CLUSTER-1 <= si->highest_bit)
-	{
+
+check_next_cluster:
+	if (offset + SWAPFILE_CLUSTER - 1 <= si->highest_bit) {
 		int nr;
-		for (nr = offset; nr < offset+SWAPFILE_CLUSTER; nr++)
-			if (si->swap_map[nr])
-			{
-				offset = nr+1;
+
+		for (nr = offset; nr < offset + SWAPFILE_CLUSTER; nr++) {
+			if (si->swap_map[nr]) {
+				offset = nr + 1;
 				goto check_next_cluster;
 			}
-		/* We found a completly empty cluster, so start
-		 * using it.
+		}
+
+		/*
+		 * We found a completly empty cluster, so start using it.
 		 */
 		goto got_page;
 	}
-	/* No luck, so now go finegrined as usual. -Andrea */
-	for (offset = si->lowest_bit; offset <= si->highest_bit ; offset++) {
+	/*
+	 * No luck, so now go finegrained as usual. -Andrea
+	 */
+	for (offset = si->lowest_bit; offset <= si->highest_bit; offset++) {
 		if (si->swap_map[offset])
 			continue;
 		si->lowest_bit = offset+1;
-	got_page:
+got_page:
 		if (offset == si->lowest_bit)
 			si->lowest_bit++;
 		if (offset == si->highest_bit)
@@ -92,7 +104,7 @@ static inline int scan_swap_map(struct s
 		}
 		si->swap_map[offset] = 1;
 		nr_swap_pages--;
-		si->cluster_next = offset+1;
+		si->cluster_next = offset + 1;
 		return offset;
 	}
 	si->lowest_bit = si->max;
@@ -100,9 +112,11 @@ static inline int scan_swap_map(struct s
 	return 0;
 }
 
+int akpm;
+
 swp_entry_t get_swap_page(void)
 {
-	struct swap_info_struct * p;
+	struct swap_info_struct *p;
 	unsigned long offset;
 	swp_entry_t entry;
 	int type, wrapped = 0;
@@ -122,11 +136,27 @@ swp_entry_t get_swap_page(void)
 			offset = scan_swap_map(p);
 			swap_device_unlock(p);
 			if (offset) {
-				entry = swp_entry(type,offset);
+				static int stripe;
+
+				entry = swp_entry(type, offset);
+
+				/*
+				 * Keep using the same device for a decent
+				 * number of pages so that we can build nice
+				 * big BIOs against it.
+				 */
+				if (stripe++ < SWAP_STRIPE_PAGES)
+					goto out;
+				stripe = 0;
+
+				/*
+				 * Select the next swapdevice.  Stripe across
+				 * devices if the priorities are equal.
+				 */
 				type = swap_info[type].next;
 				if (type < 0 ||
 					p->prio != swap_info[type].prio) {
-						swap_list.next = swap_list.head;
+					swap_list.next = swap_list.head;
 				} else {
 					swap_list.next = type;
 				}
@@ -139,12 +169,15 @@ swp_entry_t get_swap_page(void)
 				type = swap_list.head;
 				wrapped = 1;
 			}
-		} else
+		} else {
 			if (type < 0)
 				goto out;	/* out of swap space */
+		}
 	}
 out:
 	swap_list_unlock();
+	if (akpm)
+		printk("%d:%lu\n", swp_type(entry), swp_offset(entry));
 	return entry;
 }
 
--- 2.5.21/mm/memory.c~swap-mpage-write	Sat Jun 15 17:15:02 2002
+++ 2.5.21-akpm/mm/memory.c	Sat Jun 15 17:15:03 2002
@@ -1112,32 +1112,6 @@ out:
 	return 0;
 }
 
-/* 
- * Primitive swap readahead code. We simply read an aligned block of
- * (1 << page_cluster) entries in the swap area. This method is chosen
- * because it doesn't cost us any seek time.  We also make sure to queue
- * the 'original' request together with the readahead ones...  
- */
-void swapin_readahead(swp_entry_t entry)
-{
-	int i, num;
-	struct page *new_page;
-	unsigned long offset;
-
-	/*
-	 * Get the number of handles we should do readahead io to.
-	 */
-	num = valid_swaphandles(entry, &offset);
-	for (i = 0; i < num; offset++, i++) {
-		/* Ok, do the async read-ahead now */
-		new_page = read_swap_cache_async(swp_entry(swp_type(entry), offset));
-		if (!new_page)
-			break;
-		page_cache_release(new_page);
-	}
-	return;
-}
-
 /*
  * We hold the mm semaphore and the page_table_lock on entry and
  * should release the pagetable lock on exit..

-

next prev parent reply	other threads:[~2002-06-17  7:11 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-06-17  6:52 [patch 10/19] direct-to-BIO I/O for swapcache pages Andrew Morton
2002-06-17  7:13 ` Andrew Morton [this message]
2002-06-17 16:17 ` Andreas Dilger
2002-06-17 18:42   ` Andrew Morton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D0D8C1B.D14F5171@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.