From: Wu Fengguang <wfg@mail.ustc.edu.cn>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Wu Fengguang <wfg@mail.ustc.edu.cn>
Subject: [PATCH 26/33] readahead: call scheme
Date: Wed, 24 May 2006 19:13:12 +0800 [thread overview]
Message-ID: <348469548.73326@ustc.edu.cn> (raw)
Message-ID: <20060524111910.207894375@localhost.localdomain> (raw)
In-Reply-To: 20060524111246.420010595@localhost.localdomain
[-- Attachment #1: readahead-call-scheme.patch --]
[-- Type: text/plain, Size: 9456 bytes --]
The read-ahead logic is called when the reading hits
- a look-ahead mark;
- a non-present page.
ra.prev_page should be properly setup on entrance, and readahead_cache_hit()
should be called on every page reference to maintain the cache_hits counter.
This call scheme achieves the following goals:
- makes all stateful/stateless methods happy;
- eliminates the cache hit problem naturally;
- lives in harmony with application managed read-aheads via
fadvise/madvise.
Signed-off-by: Wu Fengguang <wfg@mail.ustc.edu.cn>
---
include/linux/mm.h | 6 ++
mm/filemap.c | 51 ++++++++++++++++-
mm/readahead.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 205 insertions(+), 4 deletions(-)
--- linux-2.6.17-rc4-mm3.orig/include/linux/mm.h
+++ linux-2.6.17-rc4-mm3/include/linux/mm.h
@@ -1033,6 +1033,12 @@ void handle_ra_miss(struct address_space
struct file_ra_state *ra, pgoff_t offset);
unsigned long max_sane_readahead(unsigned long nr);
void fastcall readahead_close(struct file *file);
+unsigned long
+page_cache_readahead_adaptive(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *filp,
+ struct page *prev_page, struct page *page,
+ pgoff_t first_index, pgoff_t index, pgoff_t last_index);
+void fastcall readahead_cache_hit(struct file_ra_state *ra, struct page *page);
#ifdef CONFIG_ADAPTIVE_READAHEAD
extern int readahead_ratio;
--- linux-2.6.17-rc4-mm3.orig/mm/filemap.c
+++ linux-2.6.17-rc4-mm3/mm/filemap.c
@@ -847,14 +847,32 @@ void do_generic_mapping_read(struct addr
nr = nr - offset;
cond_resched();
- if (index == next_index)
+
+ if (!prefer_adaptive_readahead() && index == next_index)
next_index = page_cache_readahead(mapping, &ra, filp,
index, last_index - index);
find_page:
page = find_get_page(mapping, index);
+ if (prefer_adaptive_readahead()) {
+ if (unlikely(page == NULL)) {
+ ra.prev_page = prev_index;
+ page_cache_readahead_adaptive(mapping, &ra,
+ filp, prev_page, NULL,
+ *ppos >> PAGE_CACHE_SHIFT,
+ index, last_index);
+ page = find_get_page(mapping, index);
+ } else if (PageReadahead(page)) {
+ ra.prev_page = prev_index;
+ page_cache_readahead_adaptive(mapping, &ra,
+ filp, prev_page, page,
+ *ppos >> PAGE_CACHE_SHIFT,
+ index, last_index);
+ }
+ }
if (unlikely(page == NULL)) {
- handle_ra_miss(mapping, &ra, index);
+ if (!prefer_adaptive_readahead())
+ handle_ra_miss(mapping, &ra, index);
goto no_cached_page;
}
@@ -862,6 +880,9 @@ find_page:
page_cache_release(prev_page);
prev_page = page;
+ if (prefer_adaptive_readahead())
+ readahead_cache_hit(&ra, page);
+
if (!PageUptodate(page))
goto page_not_up_to_date;
page_ok:
@@ -1005,6 +1026,8 @@ no_cached_page:
out:
*_ra = ra;
+ if (prefer_adaptive_readahead())
+ _ra->prev_page = prev_index;
*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
if (cached_page)
@@ -1290,6 +1313,7 @@ struct page *filemap_nopage(struct vm_ar
unsigned long size, pgoff;
int did_readaround = 0, majmin = VM_FAULT_MINOR;
+ ra->flags |= RA_FLAG_MMAP;
pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
retry_all:
@@ -1307,19 +1331,33 @@ retry_all:
*
* For sequential accesses, we use the generic readahead logic.
*/
- if (VM_SequentialReadHint(area))
+ if (!prefer_adaptive_readahead() && VM_SequentialReadHint(area))
page_cache_readahead(mapping, ra, file, pgoff, 1);
+
/*
* Do we have something in the page cache already?
*/
retry_find:
page = find_get_page(mapping, pgoff);
+ if (prefer_adaptive_readahead() && VM_SequentialReadHint(area)) {
+ if (!page) {
+ page_cache_readahead_adaptive(mapping, ra,
+ file, NULL, NULL,
+ pgoff, pgoff, pgoff + 1);
+ page = find_get_page(mapping, pgoff);
+ } else if (PageReadahead(page)) {
+ page_cache_readahead_adaptive(mapping, ra,
+ file, NULL, page,
+ pgoff, pgoff, pgoff + 1);
+ }
+ }
if (!page) {
unsigned long ra_pages;
if (VM_SequentialReadHint(area)) {
- handle_ra_miss(mapping, ra, pgoff);
+ if (!prefer_adaptive_readahead())
+ handle_ra_miss(mapping, ra, pgoff);
goto no_cached_page;
}
ra->mmap_miss++;
@@ -1356,6 +1394,9 @@ retry_find:
if (!did_readaround)
ra->mmap_hit++;
+ if (prefer_adaptive_readahead())
+ readahead_cache_hit(ra, page);
+
/*
* Ok, found a page in the page cache, now we need to check
* that it's up-to-date.
@@ -1370,6 +1411,8 @@ success:
mark_page_accessed(page);
if (type)
*type = majmin;
+ if (prefer_adaptive_readahead())
+ ra->prev_page = page->index;
return page;
outside_data_content:
--- linux-2.6.17-rc4-mm3.orig/mm/readahead.c
+++ linux-2.6.17-rc4-mm3/mm/readahead.c
@@ -1717,6 +1717,158 @@ static inline void get_readahead_bounds(
PAGES_KB(128)), *ra_max / 2);
}
+/**
+ * page_cache_readahead_adaptive - thrashing safe adaptive read-ahead
+ * @mapping, @ra, @filp: the same as page_cache_readahead()
+ * @prev_page: the page at @index-1, may be NULL to let the function find it
+ * @page: the page at @index, or NULL if non-present
+ * @begin_index, @index, @end_index: offsets into @mapping
+ * [@begin_index, @end_index) is the read the caller is performing
+ * @index indicates the page to be read now
+ *
+ * page_cache_readahead_adaptive() is the entry point of the adaptive
+ * read-ahead logic. It tries a set of methods in turn to determine the
+ * appropriate readahead action and submits the readahead I/O.
+ *
+ * The caller is expected to point ra->prev_page to the previously accessed
+ * page, and to call it on two conditions:
+ * 1. @page == NULL
+ * A cache miss happened, some pages have to be read in
+ * 2. @page != NULL && PageReadahead(@page)
+ * A look-ahead mark encountered, this is set by a previous read-ahead
+ * invocation to instruct the caller to give the function a chance to
+ * check up and do next read-ahead in advance.
+ */
+unsigned long
+page_cache_readahead_adaptive(struct address_space *mapping,
+ struct file_ra_state *ra, struct file *filp,
+ struct page *prev_page, struct page *page,
+ pgoff_t begin_index, pgoff_t index, pgoff_t end_index)
+{
+ unsigned long size;
+ unsigned long ra_min;
+ unsigned long ra_max;
+ int ret;
+
+ might_sleep();
+
+ if (page) {
+ if(!TestClearPageReadahead(page))
+ return 0;
+ if (bdi_read_congested(mapping->backing_dev_info)) {
+ ra_account(ra, RA_EVENT_IO_CONGESTION,
+ end_index - index);
+ return 0;
+ }
+ }
+
+ if (page)
+ ra_account(ra, RA_EVENT_LOOKAHEAD_HIT,
+ ra->readahead_index - ra->lookahead_index);
+ else if (index)
+ ra_account(ra, RA_EVENT_CACHE_MISS, end_index - begin_index);
+
+ size = end_index - index;
+ get_readahead_bounds(ra, &ra_min, &ra_max);
+
+ /* readahead disabled? */
+ if (unlikely(!ra_max || !readahead_ratio)) {
+ size = max_sane_readahead(size);
+ goto readit;
+ }
+
+ /*
+ * Start of file.
+ */
+ if (index == 0)
+ return initial_readahead(mapping, filp, ra, size);
+
+ /*
+ * State based sequential read-ahead.
+ */
+ if (!debug_option(disable_stateful_method) &&
+ index == ra->lookahead_index && ra_cache_hit_ok(ra))
+ return state_based_readahead(mapping, filp, ra, page,
+ index, size, ra_max);
+
+ /*
+ * Recover from possible thrashing.
+ */
+ if (!page && index == ra->prev_page + 1 && ra_has_index(ra, index))
+ return thrashing_recovery_readahead(mapping, filp, ra,
+ index, ra_max);
+
+ /*
+ * Backward read-ahead.
+ */
+ if (!page && begin_index == index &&
+ try_read_backward(ra, index, size, ra_max))
+ return ra_dispatch(ra, mapping, filp);
+
+ /*
+ * Context based sequential read-ahead.
+ */
+ ret = try_context_based_readahead(mapping, ra, prev_page, page,
+ index, ra_min, ra_max);
+ if (ret > 0)
+ return ra_dispatch(ra, mapping, filp);
+ if (ret < 0)
+ return 0;
+
+ /* No action on look ahead time? */
+ if (page) {
+ ra_account(ra, RA_EVENT_LOOKAHEAD_NOACTION,
+ ra->readahead_index - index);
+ return 0;
+ }
+
+ /*
+ * Random read that follows a sequential one.
+ */
+ if (try_readahead_on_seek(ra, index, size, ra_max))
+ return ra_dispatch(ra, mapping, filp);
+
+ /*
+ * Random read.
+ */
+ if (size > ra_max)
+ size = ra_max;
+
+readit:
+ size = __do_page_cache_readahead(mapping, filp, index, size, 0);
+
+ ra_account(ra, RA_EVENT_RANDOM_READ, size);
+ dprintk("random_read(ino=%lu, pages=%lu, index=%lu-%lu-%lu) = %lu\n",
+ mapping->host->i_ino, mapping->nrpages,
+ begin_index, index, end_index, size);
+
+ return size;
+}
+
+/**
+ * readahead_cache_hit - adaptive read-ahead feedback function
+ * @ra: file_ra_state which holds the readahead state
+ * @page: the page just accessed
+ *
+ * readahead_cache_hit() is the feedback route of the adaptive read-ahead
+ * logic. It must be called on every access on the read-ahead pages.
+ */
+void fastcall readahead_cache_hit(struct file_ra_state *ra, struct page *page)
+{
+ if (!PageUptodate(page))
+ ra_account(ra, RA_EVENT_IO_BLOCK, 1);
+
+ if (!ra_has_index(ra, page->index))
+ return;
+
+ ra->cache_hits++;
+
+ if (page->index >= ra->ra_index)
+ ra_account(ra, RA_EVENT_READAHEAD_HIT, 1);
+ else
+ ra_account(ra, RA_EVENT_READAHEAD_HIT, -1);
+}
+
/*
* When closing a normal readonly file,
* - on cache hit: increase `backing_dev_info.ra_expect_bytes' slowly;
--
next prev parent reply other threads:[~2006-05-24 11:21 UTC|newest]
Thread overview: 108+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-05-24 11:12 [PATCH 00/33] Adaptive read-ahead V12 Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 15:44 ` Andrew Morton
2006-05-25 19:26 ` Michael Stone
2006-05-25 19:40 ` David Lang
2006-05-25 22:01 ` Andrew Morton
2006-05-25 20:28 ` David Lang
2006-05-26 0:48 ` Michael Stone
2006-05-26 1:19 ` Wu Fengguang
2006-05-26 1:19 ` Wu Fengguang
2006-05-26 2:10 ` Jon Smirl
2006-05-26 3:14 ` Nick Piggin
2006-05-26 14:00 ` Andi Kleen
2006-05-26 16:25 ` Andrew Morton
2006-05-26 23:54 ` Folkert van Heusden
2006-05-27 0:00 ` Con Kolivas
2006-05-27 0:08 ` Con Kolivas
2006-05-28 22:20 ` Diego Calleja
2006-05-28 22:31 ` kernel
2006-05-29 3:04 ` Wu Fengguang
2006-05-29 3:04 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 02/33] radixtree: look-aside cache Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 03/33] radixtree: hole scanning functions Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 16:19 ` Andrew Morton
2006-05-26 7:04 ` Wu Fengguang
2006-05-26 7:04 ` Wu Fengguang
2006-05-26 11:05 ` Wu Fengguang
2006-05-26 11:05 ` Wu Fengguang
2006-05-26 16:19 ` Andrew Morton
2006-05-24 11:12 ` [PATCH 04/33] readahead: page flag PG_readahead Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 16:23 ` Andrew Morton
2006-05-26 7:06 ` Wu Fengguang
2006-05-26 7:06 ` Wu Fengguang
2006-05-24 12:27 ` Peter Zijlstra
2006-05-24 12:37 ` Wu Fengguang
2006-05-24 12:37 ` Wu Fengguang
2006-05-24 12:48 ` Peter Zijlstra
2006-05-24 11:12 ` [PATCH 05/33] readahead: refactor do_generic_mapping_read() Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 06/33] readahead: refactor __do_page_cache_readahead() Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 16:30 ` Andrew Morton
2006-05-25 22:33 ` Paul Mackerras
2006-05-25 22:40 ` Andrew Morton
2006-05-26 7:13 ` Wu Fengguang
2006-05-26 7:13 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 07/33] readahead: insert cond_resched() calls Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 08/33] readahead: common macros Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 5:56 ` Nick Piggin
2006-05-25 10:41 ` Wu Fengguang
2006-05-25 10:41 ` Wu Fengguang
2006-05-26 3:33 ` Nick Piggin
2006-05-26 6:59 ` Wu Fengguang
2006-05-26 6:59 ` Wu Fengguang
2006-05-25 13:42 ` Wu Fengguang
2006-05-25 13:42 ` Wu Fengguang
2006-05-25 14:38 ` Andrew Morton
2006-05-25 16:33 ` Andrew Morton
2006-05-24 11:12 ` [PATCH 09/33] readahead: events accounting Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 16:36 ` Andrew Morton
2006-05-26 7:09 ` Wu Fengguang
2006-05-26 7:09 ` Wu Fengguang
2006-05-27 13:20 ` Wu Fengguang
2006-05-27 13:20 ` Wu Fengguang
2006-05-29 8:19 ` Martin Peschke
2006-05-24 11:12 ` [PATCH 10/33] readahead: support functions Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 5:13 ` Nick Piggin
2006-05-25 11:13 ` Wu Fengguang
2006-05-25 11:13 ` Wu Fengguang
2006-05-25 16:48 ` Andrew Morton
2006-05-26 7:31 ` Wu Fengguang
2006-05-26 7:31 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 11/33] readahead: sysctl parameters Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-25 4:50 ` [PATCH 12/33] readahead: min/max sizes Nick Piggin
2006-05-25 12:12 ` Wu Fengguang
2006-05-25 12:12 ` Wu Fengguang
2006-05-24 11:12 ` [PATCH 13/33] readahead: state based method - aging accounting Wu Fengguang
2006-05-24 11:12 ` Wu Fengguang
2006-05-26 17:04 ` Andrew Morton
2006-05-27 6:22 ` Wu Fengguang
2006-05-27 6:22 ` Wu Fengguang
2006-05-27 7:00 ` Andrew Morton
2006-05-27 7:22 ` Wu Fengguang
2006-05-27 7:22 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 14/33] readahead: state based method - data structure Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-25 6:03 ` Nick Piggin
2006-05-25 10:43 ` Wu Fengguang
2006-05-25 10:43 ` Wu Fengguang
2006-05-26 17:05 ` Andrew Morton
2006-05-27 7:02 ` Wu Fengguang
2006-05-27 7:02 ` Wu Fengguang
2006-05-27 8:27 ` Wu Fengguang
2006-05-27 8:27 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 15/33] readahead: state based method - routines Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-26 17:15 ` Andrew Morton
2006-05-27 2:06 ` Wu Fengguang
2006-05-27 2:06 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 17/33] readahead: context based method Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-25 5:26 ` Nick Piggin
2006-05-25 8:03 ` Wu Fengguang
2006-05-25 8:03 ` Wu Fengguang
2006-05-26 17:23 ` Andrew Morton
2006-05-27 2:12 ` Wu Fengguang
2006-05-27 2:12 ` Wu Fengguang
2006-05-26 17:27 ` Andrew Morton
2006-05-27 8:04 ` Wu Fengguang
2006-05-27 8:04 ` Wu Fengguang
2006-05-24 12:37 ` Peter Zijlstra
2006-05-24 13:33 ` Wu Fengguang
2006-05-24 13:33 ` Wu Fengguang
2006-05-24 15:53 ` Peter Zijlstra
2006-05-25 1:25 ` Wu Fengguang
2006-05-25 1:25 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 18/33] readahead: initial method - guiding sizes Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 19/33] readahead: initial method - thrashing guard size Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 20/33] readahead: initial method - expected read size Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-25 5:34 ` [PATCH 22/33] readahead: initial method Nick Piggin
2006-05-25 8:59 ` Wu Fengguang
2006-05-25 8:59 ` Wu Fengguang
2006-05-26 17:29 ` [PATCH 20/33] readahead: initial method - expected read size Andrew Morton
2006-05-27 6:38 ` Wu Fengguang
2006-05-27 6:38 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 23/33] readahead: backward prefetching method Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-26 17:37 ` Nate Diller
2006-05-26 19:22 ` Nathan Scott
2006-05-28 12:30 ` Wu Fengguang
2006-05-28 12:30 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 24/33] readahead: seeking reads method Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 25/33] readahead: thrashing recovery method Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang [this message]
2006-05-24 11:13 ` [PATCH 26/33] readahead: call scheme Wu Fengguang
2006-05-24 11:13 ` [PATCH 27/33] readahead: laptop mode Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-26 17:38 ` Andrew Morton
2006-05-24 11:13 ` [PATCH 28/33] readahead: loop case Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 14:01 ` Limin Wang
2006-05-25 15:48 ` wfg
2006-05-25 15:48 ` wfg
2006-05-24 11:13 ` [PATCH 29/33] readahead: nfsd case Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 30/33] readahead: turn on by default Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 31/33] readahead: debug radix tree new functions Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 32/33] readahead: debug traces showing accessed file names Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
2006-05-24 11:13 ` [PATCH 33/33] readahead: debug traces showing read patterns Wu Fengguang
2006-05-24 11:13 ` Wu Fengguang
[not found] <20060526113906.084341801@localhost.localdomain>
2006-05-26 11:39 ` [PATCH 26/33] readahead: call scheme Wu Fengguang
2006-05-26 11:39 ` Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=348469548.73326@ustc.edu.cn \
--to=wfg@mail.ustc.edu.cn \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.