All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Piggin <nickpiggin@yahoo.com.au>
To: Jens Axboe <axboe@suse.de>
Cc: David Chinner <dgc@sgi.com>, Christoph Lameter <clameter@sgi.com>,
	Andrew Morton <akpm@osdl.org>,
	linux-kernel@vger.kernel.org, npiggin@suse.de,
	linux-mm@kvack.org
Subject: Re: Lockless page cache test results
Date: Sun, 30 Apr 2006 21:44:53 +1000	[thread overview]
Message-ID: <4454A335.7070600@yahoo.com.au> (raw)
In-Reply-To: <20060430113905.GF23137@suse.de>

[-- Attachment #1: Type: text/plain, Size: 257 bytes --]

Jens Axboe wrote:

> You killed a lock too many there. I'm sure it'd help scalability,
> though :-)

Ssshh! That was my secret plan :)

Good catch though, thanks. I'll attach a new patch in case David
has a chance to try it out.

-- 
SUSE Labs, Novell Inc.

[-- Attachment #2: mm-batch-ra-pagecache-add.patch --]
[-- Type: text/plain, Size: 6960 bytes --]

Index: linux-2.6/fs/mpage.c
===================================================================
--- linux-2.6.orig/fs/mpage.c	2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/fs/mpage.c	2006-04-30 21:42:15.000000000 +1000
@@ -26,6 +26,7 @@
 #include <linux/writeback.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/swap.h>
 
 /*
  * I/O completion handler for multipage BIOs.
@@ -389,31 +390,57 @@ mpage_readpages(struct address_space *ma
 	struct bio *bio = NULL;
 	unsigned page_idx;
 	sector_t last_block_in_bio = 0;
-	struct pagevec lru_pvec;
+	struct pagevec pvec;
 	struct buffer_head map_bh;
 	unsigned long first_logical_block = 0;
 
 	clear_buffer_mapped(&map_bh);
-	pagevec_init(&lru_pvec, 0);
+	pagevec_init(&pvec, 0);
 	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
 		struct page *page = list_entry(pages->prev, struct page, lru);
 
 		prefetchw(&page->flags);
 		list_del(&page->lru);
-		if (!add_to_page_cache(page, mapping,
-					page->index, GFP_KERNEL)) {
-			bio = do_mpage_readpage(bio, page,
-					nr_pages - page_idx,
-					&last_block_in_bio, &map_bh,
-					&first_logical_block,
-					get_block);
-			if (!pagevec_add(&lru_pvec, page))
-				__pagevec_lru_add(&lru_pvec);
-		} else {
-			page_cache_release(page);
+
+		if (!pagevec_add(&pvec, page) || page_idx == nr_pages-1) {
+			int i = 0, in_cache;
+
+			if (radix_tree_preload(GFP_KERNEL))
+				goto pagevec_error;
+
+			write_lock_irq(&mapping->tree_lock);
+			for (; i < pagevec_count(&pvec); i++) {
+				struct page *page = pvec.pages[i];
+				unsigned long offset = page->index;
+
+				if (__add_to_page_cache(page, mapping, offset))
+					break; /* error */
+			}
+			write_unlock_irq(&mapping->tree_lock);
+			radix_tree_preload_end();
+
+			in_cache = i;
+			for (i = 0; i < in_cache; i++) {
+				struct page *page = pvec.pages[i];
+
+				bio = do_mpage_readpage(bio, page,
+						nr_pages - page_idx,
+						&last_block_in_bio, &map_bh,
+						&first_logical_block,
+						get_block);
+				lru_cache_add(page);
+			}
+
+pagevec_error:
+			for (; i < pagevec_count(&pvec); i++) {
+				struct page *page = pvec.pages[i];
+				page_cache_release(page);
+			}
+
+			pagevec_reinit(&pvec);
 		}
 	}
-	pagevec_lru_add(&lru_pvec);
+
 	BUG_ON(!list_empty(pages));
 	if (bio)
 		mpage_bio_submit(READ, bio);
Index: linux-2.6/mm/filemap.c
===================================================================
--- linux-2.6.orig/mm/filemap.c	2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/mm/filemap.c	2006-04-30 21:42:42.000000000 +1000
@@ -394,6 +394,21 @@ int filemap_write_and_wait_range(struct 
 	return err;
 }
 
+int __add_to_page_cache(struct page *page, struct address_space *mapping,
+		pgoff_t offset)
+{
+	int error = radix_tree_insert(&mapping->page_tree, offset, page);
+	if (!error) {
+		page_cache_get(page);
+		SetPageLocked(page);
+		page->mapping = mapping;
+		page->index = offset;
+		mapping->nrpages++;
+		pagecache_acct(1);
+	}
+	return error;
+}
+
 /*
  * This function is used to add newly allocated pagecache pages:
  * the page is new, so we can just run SetPageLocked() against it.
@@ -408,18 +423,11 @@ int add_to_page_cache(struct page *page,
 
 	if (error == 0) {
 		write_lock_irq(&mapping->tree_lock);
-		error = radix_tree_insert(&mapping->page_tree, offset, page);
-		if (!error) {
-			page_cache_get(page);
-			SetPageLocked(page);
-			page->mapping = mapping;
-			page->index = offset;
-			mapping->nrpages++;
-			pagecache_acct(1);
-		}
+		error = __add_to_page_cache(page, mapping, offset);
 		write_unlock_irq(&mapping->tree_lock);
 		radix_tree_preload_end();
 	}
+
 	return error;
 }
 
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c	2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/mm/readahead.c	2006-04-30 21:42:15.000000000 +1000
@@ -14,6 +14,7 @@
 #include <linux/blkdev.h>
 #include <linux/backing-dev.h>
 #include <linux/pagevec.h>
+#include <linux/swap.h>
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
@@ -164,37 +165,60 @@ int read_cache_pages(struct address_spac
 
 EXPORT_SYMBOL(read_cache_pages);
 
-static int read_pages(struct address_space *mapping, struct file *filp,
+static void __pagevec_read_pages(struct file *filp,
+		struct address_space *mapping, struct pagevec *pvec)
+{
+	int i = 0, in_cache;
+
+	if (radix_tree_preload(GFP_KERNEL))
+		goto out_error;
+
+	write_lock_irq(&mapping->tree_lock);
+	for (; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		unsigned long offset = page->index;
+
+		if (__add_to_page_cache(page, mapping, offset))
+			break; /* error */
+	}
+	write_unlock_irq(&mapping->tree_lock);
+	radix_tree_preload_end();
+
+	in_cache = i;
+	for (i = 0; i < in_cache; i++) {
+		struct page *page = pvec->pages[i];
+		mapping->a_ops->readpage(filp, page);
+		lru_cache_add(page);
+	}
+
+out_error:
+	for (; i < pagevec_count(pvec); i++) {
+		struct page *page = pvec->pages[i];
+		page_cache_release(page);
+	}
+
+	pagevec_reinit(pvec);
+}
+
+static void read_pages(struct address_space *mapping, struct file *filp,
 		struct list_head *pages, unsigned nr_pages)
 {
-	unsigned page_idx;
-	struct pagevec lru_pvec;
-	int ret;
+	unsigned i;
+	struct pagevec pvec;
 
 	if (mapping->a_ops->readpages) {
-		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
-		goto out;
+		mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
+		return;
 	}
 
-	pagevec_init(&lru_pvec, 0);
-	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+	pagevec_init(&pvec, 0);
+	for (i = 0; i < nr_pages; i++) {
 		struct page *page = list_to_page(pages);
 		list_del(&page->lru);
-		if (!add_to_page_cache(page, mapping,
-					page->index, GFP_KERNEL)) {
-			ret = mapping->a_ops->readpage(filp, page);
-			if (ret != AOP_TRUNCATED_PAGE) {
-				if (!pagevec_add(&lru_pvec, page))
-					__pagevec_lru_add(&lru_pvec);
-				continue;
-			} /* else fall through to release */
-		}
-		page_cache_release(page);
+
+		if (!pagevec_add(&pvec, page) || i == nr_pages-1)
+			__pagevec_read_pages(filp, mapping, &pvec);
 	}
-	pagevec_lru_add(&lru_pvec);
-	ret = 0;
-out:
-	return ret;
 }
 
 /*
Index: linux-2.6/include/linux/pagemap.h
===================================================================
--- linux-2.6.orig/include/linux/pagemap.h	2006-04-30 19:36:18.000000000 +1000
+++ linux-2.6/include/linux/pagemap.h	2006-04-30 21:42:16.000000000 +1000
@@ -97,6 +97,8 @@ extern struct page * read_cache_page(str
 extern int read_cache_pages(struct address_space *mapping,
 		struct list_head *pages, filler_t *filler, void *data);
 
+int __add_to_page_cache(struct page *page, struct address_space *mapping,
+				unsigned long index);
 int add_to_page_cache(struct page *page, struct address_space *mapping,
 				unsigned long index, gfp_t gfp_mask);
 int add_to_page_cache_lru(struct page *page, struct address_space *mapping,

  reply	other threads:[~2006-04-30 13:38 UTC|newest]

Thread overview: 99+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-26 13:53 Lockless page cache test results Jens Axboe
2006-04-26 14:43 ` Nick Piggin
2006-04-26 14:43   ` Nick Piggin
2006-04-26 19:46   ` Jens Axboe
2006-04-26 19:46     ` Jens Axboe
2006-04-27  5:39     ` Chen, Kenneth W
2006-04-27  5:39       ` Chen, Kenneth W
2006-04-27  6:07       ` Nick Piggin
2006-04-27  6:07         ` Nick Piggin
2006-04-27  6:15       ` Andi Kleen
2006-04-27  6:15         ` Andi Kleen
2006-04-27  7:51         ` Chen, Kenneth W
2006-04-27  7:51           ` Chen, Kenneth W
2006-04-26 16:55 ` Andrew Morton
2006-04-26 16:55   ` Andrew Morton
2006-04-26 17:42   ` Jens Axboe
2006-04-26 17:42     ` Jens Axboe
2006-04-26 18:10     ` Andrew Morton
2006-04-26 18:10       ` Andrew Morton
2006-04-26 18:23       ` Jens Axboe
2006-04-26 18:23         ` Jens Axboe
2006-04-26 18:46         ` Andrew Morton
2006-04-26 18:46           ` Andrew Morton
2006-04-26 19:21           ` Jens Axboe
2006-04-26 19:21             ` Jens Axboe
2006-04-27  5:58           ` Nick Piggin
2006-04-27  5:58             ` Nick Piggin
2006-04-26 18:34       ` Christoph Lameter
2006-04-26 18:34         ` Christoph Lameter
2006-04-26 18:47         ` Andrew Morton
2006-04-26 18:47           ` Andrew Morton
2006-04-26 18:48           ` Christoph Lameter
2006-04-26 18:48             ` Christoph Lameter
2006-04-26 18:49           ` Jens Axboe
2006-04-26 18:49             ` Jens Axboe
2006-04-26 20:31             ` Christoph Lameter
2006-04-26 20:31               ` Christoph Lameter
2006-04-28 14:01               ` David Chinner
2006-04-28 14:01                 ` David Chinner
2006-04-28 14:10                 ` David Chinner
2006-04-28 14:10                   ` David Chinner
2006-04-30  9:49                 ` Nick Piggin
2006-04-30 11:20                   ` Nick Piggin
2006-04-30 11:20                     ` Nick Piggin
2006-04-30 11:39                   ` Jens Axboe
2006-04-30 11:39                     ` Jens Axboe
2006-04-30 11:44                     ` Nick Piggin [this message]
2006-04-26 18:58       ` Christoph Hellwig
2006-04-26 18:58         ` Christoph Hellwig
2006-04-26 19:02         ` Jens Axboe
2006-04-26 19:02           ` Jens Axboe
2006-04-26 19:00       ` Linus Torvalds
2006-04-26 19:00         ` Linus Torvalds
2006-04-26 19:15         ` Jens Axboe
2006-04-26 19:15           ` Jens Axboe
2006-04-26 20:12           ` Andrew Morton
2006-04-26 20:12             ` Andrew Morton
2006-04-27  7:45             ` Jens Axboe
2006-04-27  7:47               ` Jens Axboe
2006-04-27  7:47                 ` Jens Axboe
2006-04-27  7:57               ` Nick Piggin
2006-04-27  7:57                 ` Nick Piggin
2006-04-27  8:02                 ` Nick Piggin
2006-04-27  8:02                   ` Nick Piggin
2006-04-27  9:00                   ` Jens Axboe
2006-04-27  9:00                     ` Jens Axboe
2006-04-27 13:36                     ` Nick Piggin
2006-04-27 13:36                       ` Nick Piggin
2006-04-27  8:36                 ` Jens Axboe
2006-04-27  8:36                   ` Jens Axboe
2006-04-28 11:28             ` Wu Fengguang
2006-04-28 11:28               ` Wu Fengguang
2006-04-28 11:28                 ` Wu Fengguang
2006-04-27  5:49         ` Nick Piggin
2006-04-27  5:49           ` Nick Piggin
2006-04-27 15:12           ` Linus Torvalds
2006-04-27 15:12             ` Linus Torvalds
2006-04-28  4:54             ` Nick Piggin
2006-04-28  4:54               ` Nick Piggin
2006-04-28  5:34               ` Linus Torvalds
2006-04-28  5:34                 ` Linus Torvalds
2006-04-27  9:35         ` Jens Axboe
2006-04-27  5:22       ` Nick Piggin
2006-04-27  5:22         ` Nick Piggin
2006-04-26 18:57     ` Jens Axboe
2006-04-27  2:19       ` KAMEZAWA Hiroyuki
2006-04-27  2:19         ` KAMEZAWA Hiroyuki
2006-04-27  8:03         ` Jens Axboe
2006-04-27  8:03           ` Jens Axboe
2006-04-27 11:16           ` Jens Axboe
2006-04-27 11:16             ` Jens Axboe
2006-04-27 11:41             ` KAMEZAWA Hiroyuki
2006-04-27 11:41               ` KAMEZAWA Hiroyuki
2006-04-27 11:45               ` Jens Axboe
2006-04-27 11:45                 ` Jens Axboe
2006-04-28  9:10 ` Pavel Machek
2006-04-28  9:10   ` Pavel Machek
2006-04-28  9:21   ` Jens Axboe
2006-04-28  9:21     ` Jens Axboe
  -- strict thread matches above, loose matches on Subject: below --
2006-04-28 16:58 Al Boldi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4454A335.7070600@yahoo.com.au \
    --to=nickpiggin@yahoo.com.au \
    --cc=akpm@osdl.org \
    --cc=axboe@suse.de \
    --cc=clameter@sgi.com \
    --cc=dgc@sgi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=npiggin@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.