public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Wu Fengguang <wfg@mail.ustc.edu.cn>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Wu Fengguang <wfg@mail.ustc.edu.cn>
Subject: [PATCH 26/33] readahead: call scheme
Date: Fri, 26 May 2006 19:39:32 +0800	[thread overview]
Message-ID: <348644389.06434@ustc.edu.cn> (raw)
Message-ID: <20060526115314.929319286@localhost.localdomain> (raw)
In-Reply-To: 20060526113906.084341801@localhost.localdomain

[-- Attachment #1: readahead-call-scheme.patch --]
[-- Type: text/plain, Size: 9465 bytes --]

The read-ahead logic is called when the reading hits
        - a PG_readahead marked page;
        - a non-present page.

ra.prev_page should be properly setup on entrance, and readahead_cache_hit()
should be called on every page reference to maintain the cache_hits counter.

This call scheme achieves the following goals:
        - makes all stateful/stateless methods happy;
        - eliminates the cache hit problem naturally;
        - lives in harmony with application managed read-aheads via
          fadvise/madvise.

Signed-off-by: Wu Fengguang <wfg@mail.ustc.edu.cn>
---

 include/linux/mm.h |    6 ++
 mm/filemap.c       |   51 ++++++++++++++++-
 mm/readahead.c     |  152 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 205 insertions(+), 4 deletions(-)

--- linux-2.6.17-rc4-mm3.orig/include/linux/mm.h
+++ linux-2.6.17-rc4-mm3/include/linux/mm.h
@@ -1033,6 +1033,12 @@ void handle_ra_miss(struct address_space
 		    struct file_ra_state *ra, pgoff_t offset);
 unsigned long max_sane_readahead(unsigned long nr);
 void fastcall readahead_close(struct file *file);
+unsigned long
+page_cache_readahead_adaptive(struct address_space *mapping,
+			struct file_ra_state *ra, struct file *filp,
+			struct page *prev_page, struct page *page,
+			pgoff_t first_index, pgoff_t index, pgoff_t last_index);
+void fastcall readahead_cache_hit(struct file_ra_state *ra, struct page *page);
 
 #ifdef CONFIG_ADAPTIVE_READAHEAD
 extern int readahead_ratio;
--- linux-2.6.17-rc4-mm3.orig/mm/filemap.c
+++ linux-2.6.17-rc4-mm3/mm/filemap.c
@@ -847,14 +847,32 @@ void do_generic_mapping_read(struct addr
 		nr = nr - offset;
 
 		cond_resched();
-		if (index == next_index)
+
+		if (!prefer_adaptive_readahead() && index == next_index)
 			next_index = page_cache_readahead(mapping, &ra, filp,
 					index, last_index - index);
 
 find_page:
 		page = find_get_page(mapping, index);
+		if (prefer_adaptive_readahead()) {
+			if (unlikely(page == NULL)) {
+				ra.prev_page = prev_index;
+				page_cache_readahead_adaptive(mapping, &ra,
+						filp, prev_page, NULL,
+						*ppos >> PAGE_CACHE_SHIFT,
+						index, last_index);
+				page = find_get_page(mapping, index);
+			} else if (PageReadahead(page)) {
+				ra.prev_page = prev_index;
+				page_cache_readahead_adaptive(mapping, &ra,
+						filp, prev_page, page,
+						*ppos >> PAGE_CACHE_SHIFT,
+						index, last_index);
+			}
+		}
 		if (unlikely(page == NULL)) {
-			handle_ra_miss(mapping, &ra, index);
+			if (!prefer_adaptive_readahead())
+				handle_ra_miss(mapping, &ra, index);
 			goto no_cached_page;
 		}
 
@@ -862,6 +880,9 @@ find_page:
 			page_cache_release(prev_page);
 		prev_page = page;
 
+		if (prefer_adaptive_readahead())
+			readahead_cache_hit(&ra, page);
+
 		if (!PageUptodate(page))
 			goto page_not_up_to_date;
 page_ok:
@@ -1005,6 +1026,8 @@ no_cached_page:
 
 out:
 	*_ra = ra;
+	if (prefer_adaptive_readahead())
+		_ra->prev_page = prev_index;
 
 	*ppos = ((loff_t) index << PAGE_CACHE_SHIFT) + offset;
 	if (cached_page)
@@ -1290,6 +1313,7 @@ struct page *filemap_nopage(struct vm_ar
 	unsigned long size, pgoff;
 	int did_readaround = 0, majmin = VM_FAULT_MINOR;
 
+	ra->flags |= RA_FLAG_MMAP;
 	pgoff = ((address-area->vm_start) >> PAGE_CACHE_SHIFT) + area->vm_pgoff;
 
 retry_all:
@@ -1307,19 +1331,33 @@ retry_all:
 	 *
 	 * For sequential accesses, we use the generic readahead logic.
 	 */
-	if (VM_SequentialReadHint(area))
+	if (!prefer_adaptive_readahead() && VM_SequentialReadHint(area))
 		page_cache_readahead(mapping, ra, file, pgoff, 1);
 
+
 	/*
 	 * Do we have something in the page cache already?
 	 */
 retry_find:
 	page = find_get_page(mapping, pgoff);
+	if (prefer_adaptive_readahead() && VM_SequentialReadHint(area)) {
+		if (!page) {
+			page_cache_readahead_adaptive(mapping, ra,
+						file, NULL, NULL,
+						pgoff, pgoff, pgoff + 1);
+			page = find_get_page(mapping, pgoff);
+		} else if (PageReadahead(page)) {
+			page_cache_readahead_adaptive(mapping, ra,
+						file, NULL, page,
+						pgoff, pgoff, pgoff + 1);
+		}
+	}
 	if (!page) {
 		unsigned long ra_pages;
 
 		if (VM_SequentialReadHint(area)) {
-			handle_ra_miss(mapping, ra, pgoff);
+			if (!prefer_adaptive_readahead())
+				handle_ra_miss(mapping, ra, pgoff);
 			goto no_cached_page;
 		}
 		ra->mmap_miss++;
@@ -1356,6 +1394,9 @@ retry_find:
 	if (!did_readaround)
 		ra->mmap_hit++;
 
+	if (prefer_adaptive_readahead())
+		readahead_cache_hit(ra, page);
+
 	/*
 	 * Ok, found a page in the page cache, now we need to check
 	 * that it's up-to-date.
@@ -1370,6 +1411,8 @@ success:
 	mark_page_accessed(page);
 	if (type)
 		*type = majmin;
+	if (prefer_adaptive_readahead())
+		ra->prev_page = page->index;
 	return page;
 
 outside_data_content:
--- linux-2.6.17-rc4-mm3.orig/mm/readahead.c
+++ linux-2.6.17-rc4-mm3/mm/readahead.c
@@ -1717,6 +1717,158 @@ static inline void get_readahead_bounds(
 					PAGES_KB(128)), *ra_max / 2);
 }
 
+/**
+ * page_cache_readahead_adaptive - thrashing safe adaptive read-ahead
+ * @mapping, @ra, @filp: the same as page_cache_readahead()
+ * @prev_page: the page at @index-1, may be NULL to let the function find it
+ * @page: the page at @index, or NULL if non-present
+ * @begin_index, @index, @end_index: offsets into @mapping
+ * 		[@begin_index, @end_index) is the read the caller is performing
+ *	 	@index indicates the page to be read now
+ *
+ * page_cache_readahead_adaptive() is the entry point of the adaptive
+ * read-ahead logic. It tries a set of methods in turn to determine the
+ * appropriate readahead action and submits the readahead I/O.
+ *
+ * The caller is expected to point ra->prev_page to the previously accessed
+ * page, and to call it on two conditions:
+ * 1. @page == NULL
+ *    A cache miss happened, some pages have to be read in
+ * 2. @page != NULL && PageReadahead(@page)
+ *    A look-ahead mark encountered, this is set by a previous read-ahead
+ *    invocation to instruct the caller to give the function a chance to
+ *    check up and do next read-ahead in advance.
+ */
+unsigned long
+page_cache_readahead_adaptive(struct address_space *mapping,
+			struct file_ra_state *ra, struct file *filp,
+			struct page *prev_page, struct page *page,
+			pgoff_t begin_index, pgoff_t index, pgoff_t end_index)
+{
+	unsigned long size;
+	unsigned long ra_min;
+	unsigned long ra_max;
+	int ret;
+
+	might_sleep();
+
+	if (page) {
+		if(!TestClearPageReadahead(page))
+			return 0;
+		if (bdi_read_congested(mapping->backing_dev_info)) {
+			ra_account(ra, RA_EVENT_IO_CONGESTION,
+							end_index - index);
+			return 0;
+		}
+	}
+
+	if (page)
+		ra_account(ra, RA_EVENT_LOOKAHEAD_HIT,
+				ra->readahead_index - ra->lookahead_index);
+	else if (index)
+		ra_account(ra, RA_EVENT_CACHE_MISS, end_index - begin_index);
+
+	size = end_index - index;
+	get_readahead_bounds(ra, &ra_min, &ra_max);
+
+	/* readahead disabled? */
+	if (unlikely(!ra_max || !readahead_ratio)) {
+		size = max_sane_readahead(size);
+		goto readit;
+	}
+
+	/*
+	 * Start of file.
+	 */
+	if (index == 0)
+		return initial_readahead(mapping, filp, ra, size);
+
+	/*
+	 * State based sequential read-ahead.
+	 */
+	if (!debug_option(disable_stateful_method) &&
+			index == ra->lookahead_index && ra_cache_hit_ok(ra))
+		return state_based_readahead(mapping, filp, ra, page,
+							index, size, ra_max);
+
+	/*
+	 * Recover from possible thrashing.
+	 */
+	if (!page && index == ra->prev_page + 1 && ra_has_index(ra, index))
+		return thrashing_recovery_readahead(mapping, filp, ra,
+								index, ra_max);
+
+	/*
+	 * Backward read-ahead.
+	 */
+	if (!page && begin_index == index &&
+				try_read_backward(ra, index, size, ra_max))
+		return ra_dispatch(ra, mapping, filp);
+
+	/*
+	 * Context based sequential read-ahead.
+	 */
+	ret = try_context_based_readahead(mapping, ra, prev_page, page,
+							index, ra_min, ra_max);
+	if (ret > 0)
+		return ra_dispatch(ra, mapping, filp);
+	if (ret < 0)
+		return 0;
+
+	/* No action on look ahead time? */
+	if (page) {
+		ra_account(ra, RA_EVENT_LOOKAHEAD_NOACTION,
+						ra->readahead_index - index);
+		return 0;
+	}
+
+	/*
+	 * Random read that follows a sequential one.
+	 */
+	if (try_readahead_on_seek(ra, index, size, ra_max))
+		return ra_dispatch(ra, mapping, filp);
+
+	/*
+	 * Random read.
+	 */
+	if (size > ra_max)
+		size = ra_max;
+
+readit:
+	size = __do_page_cache_readahead(mapping, filp, index, size, 0);
+
+	ra_account(ra, RA_EVENT_RANDOM_READ, size);
+	dprintk("random_read(ino=%lu, pages=%lu, index=%lu-%lu-%lu) = %lu\n",
+			mapping->host->i_ino, mapping->nrpages,
+			begin_index, index, end_index, size);
+
+	return size;
+}
+
+/**
+ * readahead_cache_hit - adaptive read-ahead feedback function
+ * @ra: file_ra_state which holds the readahead state
+ * @page: the page just accessed
+ *
+ * readahead_cache_hit() is the feedback route of the adaptive read-ahead
+ * logic. It must be called on every access on the read-ahead pages.
+ */
+void fastcall readahead_cache_hit(struct file_ra_state *ra, struct page *page)
+{
+	if (!PageUptodate(page))
+		ra_account(ra, RA_EVENT_IO_BLOCK, 1);
+
+	if (!ra_has_index(ra, page->index))
+		return;
+
+	ra->cache_hits++;
+
+	if (page->index >= ra->ra_index)
+		ra_account(ra, RA_EVENT_READAHEAD_HIT, 1);
+	else
+		ra_account(ra, RA_EVENT_READAHEAD_HIT, -1);
+}
+
 /*
  * When closing a normal readonly file,
  * 	- on cache hit:  increase `backing_dev_info.ra_expect_bytes' slowly;

--

  parent reply	other threads:[~2006-05-26 11:53 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20060526113906.084341801@localhost.localdomain>
     [not found] ` <20060526115259.223408850@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 02/33] radixtree: introduce __radix_tree_lookup_parent() Wu Fengguang
2006-05-26 13:56     ` Christoph Lameter
     [not found]       ` <20060526140951.GA13954@mail.ustc.edu.cn>
2006-05-26 14:09         ` Wu Fengguang
     [not found] ` <20060526115259.809011306@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 03/33] radixtree: introduce radix_tree_scan_hole[_backward]() Wu Fengguang
     [not found] ` <20060526115300.609227164@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 04/33] mm: introduce probe_pages() Wu Fengguang
     [not found] ` <20060526115301.640751284@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 06/33] readahead: add look-ahead support to __do_page_cache_readahead() Wu Fengguang
     [not found] ` <20060526115302.278500703@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 07/33] readahead: delay page release in do_generic_mapping_read() Wu Fengguang
     [not found] ` <20060526115303.499451943@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 09/33] readahead: {MIN,MAX}_RA_PAGES Wu Fengguang
     [not found] ` <20060526115304.094503892@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 10/33] readahead: events accounting Wu Fengguang
     [not found] ` <20060526115304.821789643@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 11/33] readahead: rescue_pages() Wu Fengguang
     [not found] ` <20060526115305.437903777@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 12/33] readahead: sysctl parameters Wu Fengguang
     [not found] ` <20060526115306.535453644@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 14/33] readahead: state based method - aging accounting Wu Fengguang
     [not found] ` <20060526115307.794859372@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 16/33] readahead: state based method Wu Fengguang
     [not found] ` <20060526115308.522890112@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 17/33] readahead: context " Wu Fengguang
     [not found] ` <20060526115309.581525784@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 19/33] readahead: initial method - thrashing guard size Wu Fengguang
     [not found] ` <20060526115310.948231030@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 21/33] readahead: initial method - user recommended size Wu Fengguang
     [not found] ` <20060526115311.541535720@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 22/33] readahead: initial method Wu Fengguang
     [not found] ` <20060526115312.145248016@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 23/33] readahead: backward prefetching method Wu Fengguang
     [not found] ` <20060526115313.491576583@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 25/33] readahead: thrashing recovery method Wu Fengguang
     [not found] ` <20060526115314.929319286@localhost.localdomain>
2006-05-26 11:39   ` Wu Fengguang [this message]
     [not found] ` <20060526115315.823465555@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 28/33] readahead: loop case Wu Fengguang
     [not found] ` <20060526115316.335626686@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 29/33] readahead: nfsd case Wu Fengguang
     [not found] ` <20060526115316.925345724@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 30/33] readahead: turn on by default Wu Fengguang
     [not found] ` <20060526115317.663871267@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 31/33] readahead: debug radix tree new functions Wu Fengguang
     [not found] ` <20060526115318.181350700@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 32/33] readahead: debug traces showing accessed file names Wu Fengguang
     [not found] ` <20060526115318.520512078@localhost.localdomain>
2006-05-26 11:39   ` [PATCH 33/33] readahead: debug traces showing read patterns Wu Fengguang
     [not found] <20060524111246.420010595@localhost.localdomain>
     [not found] ` <20060524111910.207894375@localhost.localdomain>
2006-05-24 11:13   ` [PATCH 26/33] readahead: call scheme Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=348644389.06434@ustc.edu.cn \
    --to=wfg@mail.ustc.edu.cn \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox