From: Wu Fengguang <wfg@mail.ustc.edu.cn>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@osdl.org>, Wu Fengguang <wfg@mail.ustc.edu.cn>
Subject: [PATCH 09/16] readahead: read-around method for mmap file
Date: Sat, 03 Dec 2005 15:14:53 +0800 [thread overview]
Message-ID: <20051203071815.148667000@localhost.localdomain> (raw)
In-Reply-To: 20051203071444.260068000@localhost.localdomain
[-- Attachment #1: readahead-method-around.patch --]
[-- Type: text/plain, Size: 5625 bytes --]
Move the readaround logic outside of filemap_nopage(), and make it a little
more smart and tunable.
The original logic is quite agressive, so relax readahead_hit_rate for mmap
to make the new implementation comparable to the old behavior. It should be
larger than normal files anyway: programs can jump back and forth frequently,
there will be less chance to accumulate a big cache_hit_rate, though the
chance of it coming back to read the missed pages are large.
The pgmajfault was increased on every readahead invocation and other possible
I/O waits. Change it to simply increase on every I/O waits, since readahead
means I/O wait in normal.
Signed-off-by: Wu Fengguang <wfg@mail.ustc.edu.cn>
---
include/linux/fs.h | 1 +
include/linux/mm.h | 3 +++
mm/filemap.c | 42 +++++++-----------------------------------
mm/readahead.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 61 insertions(+), 35 deletions(-)
--- linux.orig/include/linux/mm.h
+++ linux/include/linux/mm.h
@@ -1014,6 +1014,9 @@ page_cache_readahead_adaptive(struct add
struct page *prev_page, struct page *page,
pgoff_t first_index,
pgoff_t index, pgoff_t last_index);
+unsigned long
+page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
+ struct file *filp, pgoff_t index);
void fastcall ra_access(struct file_ra_state *ra, struct page *page);
#ifdef CONFIG_DEBUG_FS
--- linux.orig/mm/filemap.c
+++ linux/mm/filemap.c
@@ -1267,8 +1267,9 @@ struct page *filemap_nopage(struct vm_ar
struct inode *inode = mapping->host;
struct page *page;
unsigned long size, pgoff;
- int did_readaround = 0, majmin = VM_FAULT_MINOR;
+ int majmin = VM_FAULT_MINOR;
+ ra->flags |= RA_FLAG_MMAP;
pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
retry_all:
@@ -1308,47 +1309,19 @@ retry_find:
}
}
if (!page) {
- unsigned long ra_pages;
-
if (VM_SequentialReadHint(area)) {
if (!prefer_adaptive_readahead())
handle_ra_miss(mapping, ra, pgoff);
goto no_cached_page;
}
- ra->mmap_miss++;
- /*
- * Do we miss much more than hit in this file? If so,
- * stop bothering with read-ahead. It will only hurt.
- */
- if (ra->mmap_miss > ra->mmap_hit + MMAP_LOTSAMISS)
- goto no_cached_page;
+ page_cache_readaround(mapping, ra, file, pgoff);
- /*
- * To keep the pgmajfault counter straight, we need to
- * check did_readaround, as this is an inner loop.
- */
- if (!did_readaround) {
- majmin = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
- }
- did_readaround = 1;
- ra_pages = max_sane_readahead(file->f_ra.ra_pages);
- if (ra_pages) {
- pgoff_t start = 0;
-
- if (pgoff > ra_pages / 2)
- start = pgoff - ra_pages / 2;
- do_page_cache_readahead(mapping, file, start, ra_pages);
- }
page = find_get_page(mapping, pgoff);
if (!page)
goto no_cached_page;
}
- if (!did_readaround)
- ra->mmap_hit++;
-
ra_access(ra, page);
if (READAHEAD_DEBUG_LEVEL(6))
printk(KERN_DEBUG "read-mmap(ino=%lu, idx=%lu, hint=%s, io=%s)\n",
@@ -1371,7 +1344,7 @@ success:
mark_page_accessed(page);
if (type)
*type = majmin;
- if (prefer_adaptive_readahead())
+ if (prefer_adaptive_readahead() || !VM_SequentialReadHint(area))
ra->prev_page = page->index;
return page;
@@ -1409,10 +1382,9 @@ no_cached_page:
return NULL;
page_not_uptodate:
- if (!did_readaround) {
- majmin = VM_FAULT_MAJOR;
- inc_page_state(pgmajfault);
- }
+ majmin = VM_FAULT_MAJOR;
+ inc_page_state(pgmajfault);
+
lock_page(page);
/* Did it get unhashed while we waited for it? */
--- linux.orig/mm/readahead.c
+++ linux/mm/readahead.c
@@ -1517,6 +1517,56 @@ try_context_based_readahead(struct addre
/*
+ * Read-around for mmaped file.
+ */
+unsigned long
+page_cache_readaround(struct address_space *mapping, struct file_ra_state *ra,
+ struct file *filp, pgoff_t index)
+{
+ unsigned long ra_index;
+ unsigned long ra_size = 0;
+ unsigned long hit_rate = 8 + readahead_hit_rate;
+
+ if (ra->cache_hit * readahead_hit_rate > ra->age)
+ ra_size = ra->ra_pages;
+ else if (ra->cache_hit * hit_rate >= ra->age)
+ ra_size = ra->ra_pages / 4;
+ else if ((unsigned)(ra->prev_page - index) <= hit_rate)
+ ra_size = 4 * (ra->prev_page - index);
+ else if ((unsigned)(index - ra->prev_page) <= hit_rate)
+ ra_size = 4 * (index - ra->prev_page);
+ else {
+ read_lock_irq(&mapping->tree_lock);
+ if (radix_tree_lookup_node(&mapping->page_tree, index, 1))
+ ra_size = RADIX_TREE_MAP_SIZE;
+ read_unlock_irq(&mapping->tree_lock);
+ }
+
+ if (!ra_size)
+ return 0;
+
+ ra_size = max_sane_readahead(ra_size);
+
+ if (index > ra_size / 2) {
+ ra_index = index - ra_size / 2;
+ if (!(ra_size & RADIX_TREE_MAP_MASK))
+ ra_index = (ra_index + RADIX_TREE_MAP_SIZE / 2) &
+ ~RADIX_TREE_MAP_MASK;
+ } else
+ ra_index = 0;
+
+ set_ra_class(ra, RA_CLASS_AROUND);
+ ra->cache_hit = 0;
+ ra->ra_index = ra_index;
+ ra->la_index = ra_index;
+ ra->readahead_index = ra_index + ra_size;
+ ra->lookahead_index = ra_index + ra_size;
+
+ ra->age = ra_dispatch(ra, mapping, filp);
+ return ra->age;
+}
+
+/*
* ra_size is mainly determined by:
* 1. sequential-start: min(MIN_RA_PAGES + (pages>>14), KB(128))
* 2. sequential-max: min(ra->ra_pages, 0xFFFF)
--- linux.orig/include/linux/fs.h
+++ linux/include/linux/fs.h
@@ -622,6 +622,7 @@ struct file_ra_state {
};
#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */
#define RA_FLAG_INCACHE 0x02 /* file is already in cache */
+#define RA_FLAG_MMAP (1UL<<31) /* mmaped page access */
struct file {
/*
--
next prev parent reply other threads:[~2005-12-03 7:12 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-12-03 7:14 [PATCH 00/16] Adaptive read-ahead V9 Wu Fengguang
2005-12-03 7:14 ` [PATCH 01/16] mm: delayed page activation Wu Fengguang
2005-12-04 12:11 ` Nikita Danilov
2005-12-04 13:48 ` Wu Fengguang
2005-12-04 15:03 ` Nikita Danilov
2005-12-04 15:37 ` Help!Unable to handle kernel NULL pointer tony
2005-12-04 19:10 ` [PATCH 01/16] mm: delayed page activation Peter Zijlstra
2005-12-05 1:48 ` Wu Fengguang
2005-12-06 17:55 ` Nikita Danilov
2005-12-07 1:42 ` Wu Fengguang
2005-12-07 9:46 ` Andrew Morton
2005-12-07 10:36 ` Wu Fengguang
2005-12-07 12:44 ` Nikita Danilov
2005-12-07 13:53 ` Wu Fengguang
2005-12-03 7:14 ` [PATCH 02/16] radixtree: sync with mainline Wu Fengguang
2005-12-04 23:57 ` Andrew Morton
2005-12-05 1:43 ` Wu Fengguang
2005-12-05 4:05 ` Wu Fengguang
2005-12-05 17:22 ` Christoph Lameter
2005-12-05 10:44 ` Wu Fengguang
2005-12-05 17:24 ` Christoph Lameter
2005-12-06 2:23 ` Wu Fengguang
2005-12-03 7:14 ` [PATCH 03/16] radixtree: look-aside cache Wu Fengguang
2005-12-03 7:14 ` [PATCH 04/16] readahead: some preparation Wu Fengguang
2005-12-03 7:14 ` [PATCH 05/16] readahead: call scheme Wu Fengguang
2005-12-03 7:14 ` [PATCH 06/16] readahead: parameters Wu Fengguang
2005-12-03 7:14 ` [PATCH 07/16] readahead: state based method Wu Fengguang
2005-12-03 7:14 ` [PATCH 08/16] readahead: context " Wu Fengguang
2005-12-03 7:14 ` Wu Fengguang [this message]
2005-12-03 7:14 ` [PATCH 10/16] readahead: other methods Wu Fengguang
2005-12-03 7:14 ` [PATCH 11/16] readahead: detect and rescue live pages Wu Fengguang
2005-12-03 7:14 ` [PATCH 12/16] readahead: events accounting Wu Fengguang
2005-12-03 7:14 ` [PATCH 13/16] readahead: laptop mode support Wu Fengguang
2005-12-03 7:14 ` [PATCH 14/16] readahead: disable look-ahead for loopback file Wu Fengguang
2005-12-03 7:14 ` [PATCH 15/16] readahead: nfsd support Wu Fengguang
2005-12-03 7:15 ` [PATCH 16/16] io: prevent too much latency in the read-ahead code Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20051203071815.148667000@localhost.localdomain \
--to=wfg@mail.ustc.edu.cn \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox