From: Wu Fengguang <wfg@mail.ustc.edu.cn>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Wu Fengguang <wfg@mail.ustc.edu.cn>
Subject: [PATCH 14/32] readahead: state based method - routines
Date: Sat, 27 May 2006 23:49:03 +0800 [thread overview]
Message-ID: <348745091.16246@ustc.edu.cn> (raw)
Message-ID: <20060527155132.649338979@localhost.localdomain> (raw)
In-Reply-To: 20060527154849.927021763@localhost.localdomain
[-- Attachment #1: readahead-method-stateful-routines.patch --]
[-- Type: text/plain, Size: 9428 bytes --]
Extend struct file_ra_state to support the adaptive read-ahead logic.
Also define some helpers for it.
Signed-off-by: Wu Fengguang <wfg@mail.ustc.edu.cn>
---
include/linux/fs.h | 74 +++++++++++++++++---
mm/readahead.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 251 insertions(+), 12 deletions(-)
--- linux-2.6.17-rc4-mm3.orig/include/linux/fs.h
+++ linux-2.6.17-rc4-mm3/include/linux/fs.h
@@ -613,21 +613,75 @@ struct fown_struct {
/*
* Track a single file's readahead state
+ *
+ * Diagram for the adaptive readahead logic:
+ *
+ * |--------- old chunk ------->|-------------- new chunk -------------->|
+ * +----------------------------+----------------------------------------+
+ * | # | # |
+ * +----------------------------+----------------------------------------+
+ * ^ ^ ^ ^
+ * file_ra_state.la_index .ra_index .lookahead_index .readahead_index
+ *
+ * Common used deduced sizes:
+ * |----------- readahead size ------------>|
+ * +----------------------------+----------------------------------------+
+ * | # | # |
+ * +----------------------------+----------------------------------------+
+ * |------- invoke interval ------>|-- lookahead size -->|
*/
struct file_ra_state {
- unsigned long start; /* Current window */
- unsigned long size;
- unsigned long flags; /* ra flags RA_FLAG_xxx*/
- unsigned long cache_hit; /* cache hit count*/
- unsigned long prev_page; /* Cache last read() position */
- unsigned long ahead_start; /* Ahead window */
- unsigned long ahead_size;
- unsigned long ra_pages; /* Maximum readahead window */
+ union {
+ struct { /* stock read-ahead */
+ unsigned long start; /* Current window */
+ unsigned long size;
+ unsigned long ahead_start; /* Ahead window */
+ unsigned long ahead_size;
+ unsigned long cache_hit; /* cache hit count */
+ };
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+ struct { /* adaptive read-ahead */
+ pgoff_t la_index; /* old chunk */
+ pgoff_t ra_index;
+ pgoff_t lookahead_index; /* new chunk */
+ pgoff_t readahead_index;
+
+ /*
+ * Read-ahead hits.
+ * i.e. # of distinct read-ahead pages accessed.
+ *
+ * What is a read-ahead sequence?
+ * A collection of sequential read-ahead requests.
+ * To put it simple:
+ * Normally a seek starts a new sequence.
+ */
+ u16 hit0; /* for the current request */
+ u16 hit1; /* for the current sequence */
+ u16 hit2; /* for the previous sequence */
+ u16 hit3; /* for the prev-prev sequence */
+
+ /*
+ * Snapshot of the (node's) read-ahead aging value
+ * on time of I/O submission.
+ */
+ unsigned long age;
+ };
+#endif
+ };
+
+ /* mmap read-around */
unsigned long mmap_hit; /* Cache hit stat for mmap accesses */
unsigned long mmap_miss; /* Cache miss stat for mmap accesses */
+
+ unsigned long flags; /* RA_FLAG_xxx | ra_class_old | ra_class_new */
+ unsigned long prev_page; /* Cache last read() position */
+ unsigned long ra_pages; /* Maximum readahead window */
};
-#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */
-#define RA_FLAG_INCACHE 0x02 /* file is already in cache */
+#define RA_FLAG_MISS (1UL<<31) /* a cache miss occured against this file */
+#define RA_FLAG_INCACHE (1UL<<30) /* file is already in cache */
+#define RA_FLAG_MMAP (1UL<<29) /* mmaped page access */
+#define RA_FLAG_NO_LOOKAHEAD (1UL<<28) /* disable look-ahead */
+#define RA_FLAG_EOF (1UL<<27) /* readahead hits EOF */
struct file {
/*
--- linux-2.6.17-rc4-mm3.orig/mm/readahead.c
+++ linux-2.6.17-rc4-mm3/mm/readahead.c
@@ -817,6 +817,191 @@ static unsigned long node_readahead_agin
}
/*
+ * Some helpers for querying/building a read-ahead request.
+ *
+ * Diagram for some variable names used frequently:
+ *
+ * |<------- la_size ------>|
+ * +-----------------------------------------+
+ * | # |
+ * +-----------------------------------------+
+ * ra_index -->|<---------------- ra_size -------------->|
+ *
+ */
+
+static enum ra_class ra_class_new(struct file_ra_state *ra)
+{
+ return ra->flags & RA_CLASS_MASK;
+}
+
+static inline enum ra_class ra_class_old(struct file_ra_state *ra)
+{
+ return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+}
+
+static unsigned long ra_readahead_size(struct file_ra_state *ra)
+{
+ return ra->readahead_index - ra->ra_index;
+}
+
+static unsigned long ra_lookahead_size(struct file_ra_state *ra)
+{
+ return ra->readahead_index - ra->lookahead_index;
+}
+
+static unsigned long ra_invoke_interval(struct file_ra_state *ra)
+{
+ return ra->lookahead_index - ra->la_index;
+}
+
+/*
+ * The read-ahead is deemed success if cache-hit-rate >= 1/readahead_hit_rate.
+ */
+static int ra_cache_hit_ok(struct file_ra_state *ra)
+{
+ return ra->hit0 * readahead_hit_rate >=
+ (ra->lookahead_index - ra->la_index);
+}
+
+/*
+ * Check if @index falls in the @ra request.
+ */
+static int ra_has_index(struct file_ra_state *ra, pgoff_t index)
+{
+ if (index < ra->la_index || index >= ra->readahead_index)
+ return 0;
+
+ if (index >= ra->ra_index)
+ return 1;
+ else
+ return -1;
+}
+
+/*
+ * Which method is issuing this read-ahead?
+ */
+static void ra_set_class(struct file_ra_state *ra,
+ enum ra_class ra_class)
+{
+ unsigned long flags_mask;
+ unsigned long flags;
+ unsigned long old_ra_class;
+
+ flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT));
+ flags = ra->flags & flags_mask;
+
+ old_ra_class = ra_class_new(ra) << RA_CLASS_SHIFT;
+
+ ra->flags = flags | old_ra_class | ra_class;
+
+ /*
+ * Add request-hit up to sequence-hit and reset the former.
+ */
+ ra->hit1 += ra->hit0;
+ ra->hit0 = 0;
+
+ /*
+ * Manage the read-ahead sequences' hit counts.
+ * - the stateful method continues any existing sequence;
+ * - all other methods starts a new one.
+ */
+ if (ra_class != RA_CLASS_STATE) {
+ ra->hit3 = ra->hit2;
+ ra->hit2 = ra->hit1;
+ ra->hit1 = 0;
+ }
+}
+
+/*
+ * Where is the old read-ahead and look-ahead?
+ */
+static void ra_set_index(struct file_ra_state *ra,
+ pgoff_t la_index, pgoff_t ra_index)
+{
+ ra->la_index = la_index;
+ ra->ra_index = ra_index;
+}
+
+/*
+ * Where is the new read-ahead and look-ahead?
+ */
+static void ra_set_size(struct file_ra_state *ra,
+ unsigned long ra_size, unsigned long la_size)
+{
+ ra->readahead_index = ra->ra_index + ra_size;
+ ra->lookahead_index = ra->readahead_index - la_size;
+}
+
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+static int ra_dispatch(struct file_ra_state *ra,
+ struct address_space *mapping, struct file *filp)
+{
+ unsigned long ra_size;
+ unsigned long la_size;
+ pgoff_t eof_index;
+ int actual;
+
+ eof_index = /* it's a past-the-end index! */
+ DIV_ROUND_UP(i_size_read(mapping->host), PAGE_CACHE_SIZE);
+
+ if (unlikely(ra->ra_index >= eof_index))
+ return 0;
+
+ /*
+ * Snap to EOF, if the request
+ * - crossed the EOF boundary;
+ * - is close to EOF(explained below).
+ *
+ * Imagine a file sized 18 pages, and we dicided to read-ahead the
+ * first 16 pages. It is highly possible that in the near future we
+ * will have to do another read-ahead for the remaining 2 pages,
+ * which is an unfavorable small I/O.
+ *
+ * So we prefer to take a bit risk to enlarge the current read-ahead,
+ * to eliminate possible future small I/O.
+ */
+ if (ra->readahead_index + ra_readahead_size(ra)/4 > eof_index) {
+ ra->readahead_index = eof_index;
+ if (ra->lookahead_index > eof_index)
+ ra->lookahead_index = eof_index;
+ ra->flags |= RA_FLAG_EOF;
+ }
+
+ /* Disable look-ahead for loopback file. */
+ if (unlikely(ra->flags & RA_FLAG_NO_LOOKAHEAD))
+ ra->lookahead_index = ra->readahead_index;
+
+ /* Take down the current read-ahead aging value. */
+ ra->age = node_readahead_aging();
+
+ ra_size = ra_readahead_size(ra);
+ la_size = ra_lookahead_size(ra);
+ actual = __do_page_cache_readahead(mapping, filp,
+ ra->ra_index, ra_size, la_size);
+
+#ifdef CONFIG_DEBUG_READAHEAD
+ if (ra->flags & RA_FLAG_MMAP)
+ ra_account(ra, RA_EVENT_READAHEAD_MMAP, actual);
+ if (ra->readahead_index == eof_index)
+ ra_account(ra, RA_EVENT_READAHEAD_EOF, actual);
+ if (la_size)
+ ra_account(ra, RA_EVENT_LOOKAHEAD, la_size);
+ if (ra_size > actual)
+ ra_account(ra, RA_EVENT_IO_CACHE_HIT, ra_size - actual);
+ ra_account(ra, RA_EVENT_READAHEAD, actual);
+
+ dprintk("readahead-%s(ino=%lu, index=%lu, ra=%lu+%lu-%lu) = %d\n",
+ ra_class_name[ra_class_new(ra)],
+ mapping->host->i_ino, ra->la_index,
+ ra->ra_index, ra_size, la_size, actual);
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+ return actual;
+}
+
+/*
* ra_min is mainly determined by the size of cache memory. Reasonable?
*
* Table of concrete numbers for 4KB page size:
@@ -888,10 +1073,10 @@ static void ra_account(struct file_ra_st
return;
if (e == RA_EVENT_READAHEAD_HIT && pages < 0) {
- c = (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+ c = ra_class_old(ra);
pages = -pages;
} else if (ra)
- c = ra->flags & RA_CLASS_MASK;
+ c = ra_class_new(ra);
else
c = RA_CLASS_NONE;
--
next prev parent reply other threads:[~2006-05-27 15:54 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <20060527154849.927021763@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 00/32] Adaptive readahead V14 Wu Fengguang
2006-05-27 17:29 ` Michael Tokarev
[not found] ` <20060528120815.GB6478@mail.ustc.edu.cn>
2006-05-28 12:08 ` Wu Fengguang
2006-05-28 19:23 ` Michael Tokarev
[not found] ` <20060529030152.GA5994@mail.ustc.edu.cn>
2006-05-29 3:01 ` Wu Fengguang
2006-05-30 9:23 ` Jens Axboe
[not found] ` <20060530113221.GA8665@mail.ustc.edu.cn>
2006-05-30 11:32 ` Wu Fengguang
2006-05-30 12:29 ` Jens Axboe
[not found] ` <20060530143417.GA9126@mail.ustc.edu.cn>
2006-05-30 14:34 ` Wu Fengguang
[not found] ` <20060527155125.911021581@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 01/32] readahead: kconfig options Wu Fengguang
[not found] ` <20060527155127.522802387@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 04/32] mm: introduce PG_readahead Wu Fengguang
[not found] ` <20060527155128.472551240@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 06/32] readahead: delay page release in do_generic_mapping_read() Wu Fengguang
[not found] ` <20060527155129.001886224@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 07/32] readahead: insert cond_resched() calls Wu Fengguang
[not found] ` <20060527155129.653903854@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 08/32] readahead: {MIN,MAX}_RA_PAGES Wu Fengguang
[not found] ` <20060527155130.013773601@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 09/32] readahead: events accounting Wu Fengguang
[not found] ` <20060527155130.538411854@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 10/32] readahead: rescue_pages() Wu Fengguang
[not found] ` <20060527155131.200177171@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 11/32] readahead: sysctl parameters Wu Fengguang
[not found] ` <20060527155132.649338979@localhost.localdomain>
2006-05-27 15:49 ` Wu Fengguang [this message]
[not found] ` <20060527155133.216888332@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 15/32] readahead: state based method Wu Fengguang
[not found] ` <20060527155134.715578802@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 18/32] readahead: initial method - thrashing guard size Wu Fengguang
[not found] ` <20060527155135.584918734@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 20/32] readahead: initial method - user recommended size Wu Fengguang
[not found] ` <20060527155136.503037461@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 22/32] readahead: backward prefetching method Wu Fengguang
[not found] ` <20060527155137.552915509@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 24/32] readahead: thrashing recovery method Wu Fengguang
2006-05-27 22:04 ` [PATCH 23/32] readahead: seeking reads method Ingo Oeser
[not found] ` <20060527155138.046726658@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 25/32] readahead: call scheme Wu Fengguang
[not found] ` <20060527155138.454809673@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 26/32] readahead: laptop mode Wu Fengguang
[not found] ` <20060527155140.035991503@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 27/32] readahead: loop case Wu Fengguang
[not found] ` <20060527155141.697607086@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 30/32] readahead: debug radix tree new functions Wu Fengguang
[not found] ` <20060527155142.129761018@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 31/32] readahead: debug traces showing accessed file names Wu Fengguang
[not found] ` <20060527155142.715530234@localhost.localdomain>
2006-05-27 15:49 ` [PATCH 32/32] readahead: debug traces showing read patterns Wu Fengguang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=348745091.16246@ustc.edu.cn \
--to=wfg@mail.ustc.edu.cn \
--cc=akpm@osdl.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox