public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Wu Fengguang <wfg@mail.ustc.edu.cn>
To: Andrew Morton <akpm@osdl.org>
Cc: linux-kernel@vger.kernel.org, Wu Fengguang <wfg@mail.ustc.edu.cn>
Subject: [PATCH 14/32] readahead: state based method - routines
Date: Sat, 27 May 2006 23:49:03 +0800	[thread overview]
Message-ID: <348745091.16246@ustc.edu.cn> (raw)
Message-ID: <20060527155132.649338979@localhost.localdomain> (raw)
In-Reply-To: 20060527154849.927021763@localhost.localdomain

[-- Attachment #1: readahead-method-stateful-routines.patch --]
[-- Type: text/plain, Size: 9428 bytes --]

Extend struct file_ra_state to support the adaptive read-ahead logic.
Also define some helpers for it.

Signed-off-by: Wu Fengguang <wfg@mail.ustc.edu.cn>
---

 include/linux/fs.h |   74 +++++++++++++++++---
 mm/readahead.c     |  189 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 251 insertions(+), 12 deletions(-)

--- linux-2.6.17-rc4-mm3.orig/include/linux/fs.h
+++ linux-2.6.17-rc4-mm3/include/linux/fs.h
@@ -613,21 +613,75 @@ struct fown_struct {
 
 /*
  * Track a single file's readahead state
+ *
+ * Diagram for the adaptive readahead logic:
+ *
+ *  |--------- old chunk ------->|-------------- new chunk -------------->|
+ *  +----------------------------+----------------------------------------+
+ *  |               #            |                  #                     |
+ *  +----------------------------+----------------------------------------+
+ *                  ^            ^                  ^                     ^
+ *  file_ra_state.la_index    .ra_index   .lookahead_index      .readahead_index
+ *
+ * Common used deduced sizes:
+ *                               |----------- readahead size ------------>|
+ *  +----------------------------+----------------------------------------+
+ *  |               #            |                  #                     |
+ *  +----------------------------+----------------------------------------+
+ *                  |------- invoke interval ------>|-- lookahead size -->|
  */
 struct file_ra_state {
-	unsigned long start;		/* Current window */
-	unsigned long size;
-	unsigned long flags;		/* ra flags RA_FLAG_xxx*/
-	unsigned long cache_hit;	/* cache hit count*/
-	unsigned long prev_page;	/* Cache last read() position */
-	unsigned long ahead_start;	/* Ahead window */
-	unsigned long ahead_size;
-	unsigned long ra_pages;		/* Maximum readahead window */
+	union {
+		struct { /* stock read-ahead */
+			unsigned long start;		/* Current window */
+			unsigned long size;
+			unsigned long ahead_start;	/* Ahead window */
+			unsigned long ahead_size;
+			unsigned long cache_hit;	/* cache hit count */
+		};
+#ifdef CONFIG_ADAPTIVE_READAHEAD
+		struct { /* adaptive read-ahead */
+			pgoff_t la_index;		/* old chunk */
+			pgoff_t ra_index;
+			pgoff_t lookahead_index;	/* new chunk */
+			pgoff_t readahead_index;
+
+			/*
+			 * Read-ahead hits.
+			 * 	i.e. # of distinct read-ahead pages accessed.
+			 *
+			 * What is a read-ahead sequence?
+			 * 	A collection of sequential read-ahead requests.
+			 * To put it simple:
+			 * 	Normally a seek starts a new sequence.
+			 */
+			u16	hit0;	/* for the current request */
+			u16	hit1;	/* for the current sequence */
+			u16	hit2;	/* for the previous sequence */
+			u16	hit3;	/* for the prev-prev sequence */
+
+			/*
+			 * Snapshot of the (node's) read-ahead aging value
+			 * on time of I/O submission.
+			 */
+			unsigned long age;
+		};
+#endif
+	};
+
+	/* mmap read-around */
 	unsigned long mmap_hit;		/* Cache hit stat for mmap accesses */
 	unsigned long mmap_miss;	/* Cache miss stat for mmap accesses */
+
+	unsigned long flags;	/* RA_FLAG_xxx | ra_class_old | ra_class_new */
+	unsigned long prev_page;	/* Cache last read() position */
+	unsigned long ra_pages;		/* Maximum readahead window */
 };
-#define RA_FLAG_MISS 0x01	/* a cache miss occured against this file */
-#define RA_FLAG_INCACHE 0x02	/* file is already in cache */
+#define RA_FLAG_MISS	(1UL<<31) /* a cache miss occured against this file */
+#define RA_FLAG_INCACHE	(1UL<<30) /* file is already in cache */
+#define RA_FLAG_MMAP		(1UL<<29) /* mmaped page access */
+#define RA_FLAG_NO_LOOKAHEAD	(1UL<<28) /* disable look-ahead */
+#define RA_FLAG_EOF		(1UL<<27) /* readahead hits EOF */
 
 struct file {
 	/*
--- linux-2.6.17-rc4-mm3.orig/mm/readahead.c
+++ linux-2.6.17-rc4-mm3/mm/readahead.c
@@ -817,6 +817,191 @@ static unsigned long node_readahead_agin
 }
 
 /*
+ * Some helpers for querying/building a read-ahead request.
+ *
+ * Diagram for some variable names used frequently:
+ *
+ *                                   |<------- la_size ------>|
+ *                  +-----------------------------------------+
+ *                  |                #                        |
+ *                  +-----------------------------------------+
+ *      ra_index -->|<---------------- ra_size -------------->|
+ *
+ */
+
+static enum ra_class ra_class_new(struct file_ra_state *ra)
+{
+	return ra->flags & RA_CLASS_MASK;
+}
+
+static inline enum ra_class ra_class_old(struct file_ra_state *ra)
+{
+	return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+}
+
+static unsigned long ra_readahead_size(struct file_ra_state *ra)
+{
+	return ra->readahead_index - ra->ra_index;
+}
+
+static unsigned long ra_lookahead_size(struct file_ra_state *ra)
+{
+	return ra->readahead_index - ra->lookahead_index;
+}
+
+static unsigned long ra_invoke_interval(struct file_ra_state *ra)
+{
+	return ra->lookahead_index - ra->la_index;
+}
+
+/*
+ * The read-ahead is deemed success if cache-hit-rate >= 1/readahead_hit_rate.
+ */
+static int ra_cache_hit_ok(struct file_ra_state *ra)
+{
+	return ra->hit0 * readahead_hit_rate >=
+					(ra->lookahead_index - ra->la_index);
+}
+
+/*
+ * Check if @index falls in the @ra request.
+ */
+static int ra_has_index(struct file_ra_state *ra, pgoff_t index)
+{
+	if (index < ra->la_index || index >= ra->readahead_index)
+		return 0;
+
+	if (index >= ra->ra_index)
+		return 1;
+	else
+		return -1;
+}
+
+/*
+ * Which method is issuing this read-ahead?
+ */
+static void ra_set_class(struct file_ra_state *ra,
+				enum ra_class ra_class)
+{
+	unsigned long flags_mask;
+	unsigned long flags;
+	unsigned long old_ra_class;
+
+	flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT));
+	flags = ra->flags & flags_mask;
+
+	old_ra_class = ra_class_new(ra) << RA_CLASS_SHIFT;
+
+	ra->flags = flags | old_ra_class | ra_class;
+
+	/*
+	 * Add request-hit up to sequence-hit and reset the former.
+	 */
+	ra->hit1 += ra->hit0;
+	ra->hit0 = 0;
+
+	/*
+	 * Manage the read-ahead sequences' hit counts.
+	 * 	- the stateful method continues any existing sequence;
+	 * 	- all other methods starts a new one.
+	 */
+	if (ra_class != RA_CLASS_STATE) {
+		ra->hit3 = ra->hit2;
+		ra->hit2 = ra->hit1;
+		ra->hit1 = 0;
+	}
+}
+
+/*
+ * Where is the old read-ahead and look-ahead?
+ */
+static void ra_set_index(struct file_ra_state *ra,
+				pgoff_t la_index, pgoff_t ra_index)
+{
+	ra->la_index = la_index;
+	ra->ra_index = ra_index;
+}
+
+/*
+ * Where is the new read-ahead and look-ahead?
+ */
+static void ra_set_size(struct file_ra_state *ra,
+				unsigned long ra_size, unsigned long la_size)
+{
+	ra->readahead_index = ra->ra_index + ra_size;
+	ra->lookahead_index = ra->readahead_index - la_size;
+}
+
+/*
+ * Submit IO for the read-ahead request in file_ra_state.
+ */
+static int ra_dispatch(struct file_ra_state *ra,
+			struct address_space *mapping, struct file *filp)
+{
+	unsigned long ra_size;
+	unsigned long la_size;
+	pgoff_t eof_index;
+	int actual;
+
+	eof_index = /* it's a past-the-end index! */
+		DIV_ROUND_UP(i_size_read(mapping->host), PAGE_CACHE_SIZE);
+
+	if (unlikely(ra->ra_index >= eof_index))
+		return 0;
+
+	/*
+	 * Snap to EOF, if the request
+	 * 	- crossed the EOF boundary;
+	 * 	- is close to EOF(explained below).
+	 *
+	 * Imagine a file sized 18 pages, and we dicided to read-ahead the
+	 * first 16 pages. It is highly possible that in the near future we
+	 * will have to do another read-ahead for the remaining 2 pages,
+	 * which is an unfavorable small I/O.
+	 *
+	 * So we prefer to take a bit risk to enlarge the current read-ahead,
+	 * to eliminate possible future small I/O.
+	 */
+	if (ra->readahead_index + ra_readahead_size(ra)/4 > eof_index) {
+		ra->readahead_index = eof_index;
+		if (ra->lookahead_index > eof_index)
+			ra->lookahead_index = eof_index;
+		ra->flags |= RA_FLAG_EOF;
+	}
+
+	/* Disable look-ahead for loopback file. */
+	if (unlikely(ra->flags & RA_FLAG_NO_LOOKAHEAD))
+		ra->lookahead_index = ra->readahead_index;
+
+	/* Take down the current read-ahead aging value. */
+	ra->age = node_readahead_aging();
+
+	ra_size = ra_readahead_size(ra);
+	la_size = ra_lookahead_size(ra);
+	actual = __do_page_cache_readahead(mapping, filp,
+					ra->ra_index, ra_size, la_size);
+
+#ifdef CONFIG_DEBUG_READAHEAD
+	if (ra->flags & RA_FLAG_MMAP)
+		ra_account(ra, RA_EVENT_READAHEAD_MMAP, actual);
+	if (ra->readahead_index == eof_index)
+		ra_account(ra, RA_EVENT_READAHEAD_EOF, actual);
+	if (la_size)
+		ra_account(ra, RA_EVENT_LOOKAHEAD, la_size);
+	if (ra_size > actual)
+		ra_account(ra, RA_EVENT_IO_CACHE_HIT, ra_size - actual);
+	ra_account(ra, RA_EVENT_READAHEAD, actual);
+
+	dprintk("readahead-%s(ino=%lu, index=%lu, ra=%lu+%lu-%lu) = %d\n",
+			ra_class_name[ra_class_new(ra)],
+			mapping->host->i_ino, ra->la_index,
+			ra->ra_index, ra_size, la_size, actual);
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+	return actual;
+}
+
+/*
  * ra_min is mainly determined by the size of cache memory. Reasonable?
  *
  * Table of concrete numbers for 4KB page size:
@@ -888,10 +1073,10 @@ static void ra_account(struct file_ra_st
 		return;
 
 	if (e == RA_EVENT_READAHEAD_HIT && pages < 0) {
-		c = (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+		c = ra_class_old(ra);
 		pages = -pages;
 	} else if (ra)
-		c = ra->flags & RA_CLASS_MASK;
+		c = ra_class_new(ra);
 	else
 		c = RA_CLASS_NONE;
 

--

  parent reply	other threads:[~2006-05-27 15:54 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20060527154849.927021763@localhost.localdomain>
2006-05-27 15:48 ` [PATCH 00/32] Adaptive readahead V14 Wu Fengguang
2006-05-27 17:29   ` Michael Tokarev
     [not found]     ` <20060528120815.GB6478@mail.ustc.edu.cn>
2006-05-28 12:08       ` Wu Fengguang
2006-05-28 19:23         ` Michael Tokarev
     [not found]           ` <20060529030152.GA5994@mail.ustc.edu.cn>
2006-05-29  3:01             ` Wu Fengguang
2006-05-30  9:23             ` Jens Axboe
     [not found]               ` <20060530113221.GA8665@mail.ustc.edu.cn>
2006-05-30 11:32                 ` Wu Fengguang
2006-05-30 12:29                 ` Jens Axboe
     [not found]                   ` <20060530143417.GA9126@mail.ustc.edu.cn>
2006-05-30 14:34                     ` Wu Fengguang
     [not found] ` <20060527155125.911021581@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 01/32] readahead: kconfig options Wu Fengguang
     [not found] ` <20060527155127.522802387@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 04/32] mm: introduce PG_readahead Wu Fengguang
     [not found] ` <20060527155128.472551240@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 06/32] readahead: delay page release in do_generic_mapping_read() Wu Fengguang
     [not found] ` <20060527155129.001886224@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 07/32] readahead: insert cond_resched() calls Wu Fengguang
     [not found] ` <20060527155129.653903854@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 08/32] readahead: {MIN,MAX}_RA_PAGES Wu Fengguang
     [not found] ` <20060527155130.013773601@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 09/32] readahead: events accounting Wu Fengguang
     [not found] ` <20060527155130.538411854@localhost.localdomain>
2006-05-27 15:48   ` [PATCH 10/32] readahead: rescue_pages() Wu Fengguang
     [not found] ` <20060527155131.200177171@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 11/32] readahead: sysctl parameters Wu Fengguang
     [not found] ` <20060527155132.649338979@localhost.localdomain>
2006-05-27 15:49   ` Wu Fengguang [this message]
     [not found] ` <20060527155133.216888332@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 15/32] readahead: state based method Wu Fengguang
     [not found] ` <20060527155134.715578802@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 18/32] readahead: initial method - thrashing guard size Wu Fengguang
     [not found] ` <20060527155135.584918734@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 20/32] readahead: initial method - user recommended size Wu Fengguang
     [not found] ` <20060527155136.503037461@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 22/32] readahead: backward prefetching method Wu Fengguang
     [not found] ` <20060527155137.552915509@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 24/32] readahead: thrashing recovery method Wu Fengguang
2006-05-27 22:04     ` [PATCH 23/32] readahead: seeking reads method Ingo Oeser
     [not found] ` <20060527155138.046726658@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 25/32] readahead: call scheme Wu Fengguang
     [not found] ` <20060527155138.454809673@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 26/32] readahead: laptop mode Wu Fengguang
     [not found] ` <20060527155140.035991503@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 27/32] readahead: loop case Wu Fengguang
     [not found] ` <20060527155141.697607086@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 30/32] readahead: debug radix tree new functions Wu Fengguang
     [not found] ` <20060527155142.129761018@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 31/32] readahead: debug traces showing accessed file names Wu Fengguang
     [not found] ` <20060527155142.715530234@localhost.localdomain>
2006-05-27 15:49   ` [PATCH 32/32] readahead: debug traces showing read patterns Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=348745091.16246@ustc.edu.cn \
    --to=wfg@mail.ustc.edu.cn \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox