linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <jens.axboe@oracle.com>, Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Wu Fengguang <fengguang.wu@intel.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: Olivier Galibert <galibert@pobox.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Nick Piggin <npiggin@suse.de>
Cc: Linux Memory Management List <linux-mm@kvack.org>
Cc: <linux-fsdevel@vger.kernel.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 08/15] readahead: record readahead patterns
Date: Wed, 24 Feb 2010 11:10:09 +0800	[thread overview]
Message-ID: <20100224031054.734136802@intel.com> (raw)
In-Reply-To: 20100224031001.026464755@intel.com

[-- Attachment #1: readahead-tracepoints.patch --]
[-- Type: text/plain, Size: 6387 bytes --]

Record the readahead pattern in ra_flags. This info can be examined by
users via the readahead tracing/stats interfaces.

Currently 7 patterns are defined:

      	pattern			readahead for
-----------------------------------------------------------
	RA_PATTERN_INITIAL	start-of-file/oversize read
	RA_PATTERN_SUBSEQUENT	trivial     sequential read
	RA_PATTERN_CONTEXT	interleaved sequential read
	RA_PATTERN_THRASH	thrashed    sequential read
	RA_PATTERN_MMAP_AROUND	mmap fault
	RA_PATTERN_FADVISE	posix_fadvise()
	RA_PATTERN_RANDOM	random read

CC: Ingo Molnar <mingo@elte.hu> 
CC: Jens Axboe <jens.axboe@oracle.com> 
CC: Peter Zijlstra <a.p.zijlstra@chello.nl> 
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
 include/linux/fs.h |   32 ++++++++++++++++++++++++++++++++
 include/linux/mm.h |    4 +++-
 mm/filemap.c       |    9 +++++++--
 mm/readahead.c     |   17 +++++++++++++----
 4 files changed, 55 insertions(+), 7 deletions(-)

--- linux.orig/include/linux/fs.h	2010-02-24 10:44:44.000000000 +0800
+++ linux/include/linux/fs.h	2010-02-24 10:44:45.000000000 +0800
@@ -894,8 +894,40 @@ struct file_ra_state {
 };
 
 /* ra_flags bits */
+#define READAHEAD_PATTERN_SHIFT	20
+#define READAHEAD_PATTERN	0x00f00000
 #define	READAHEAD_MMAP_MISS	0x00000fff /* cache misses for mmap access */
 #define READAHEAD_THRASHED	0x10000000
+#define	READAHEAD_MMAP		0x20000000
+
+/*
+ * Which policy makes decision to do the current read-ahead IO?
+ */
+enum readahead_pattern {
+	RA_PATTERN_INITIAL,
+	RA_PATTERN_SUBSEQUENT,
+	RA_PATTERN_CONTEXT,
+	RA_PATTERN_THRASH,
+	RA_PATTERN_MMAP_AROUND,
+	RA_PATTERN_FADVISE,
+	RA_PATTERN_RANDOM,
+	RA_PATTERN_ALL,		/* for summary stats */
+	RA_PATTERN_MAX
+};
+
+static inline int ra_pattern(int ra_flags)
+{
+	int pattern = (ra_flags & READAHEAD_PATTERN)
+			       >> READAHEAD_PATTERN_SHIFT;
+
+	return min(pattern, RA_PATTERN_ALL);
+}
+
+static inline void ra_set_pattern(struct file_ra_state *ra, int pattern)
+{
+	ra->ra_flags = (ra->ra_flags & ~READAHEAD_PATTERN) |
+			    (pattern << READAHEAD_PATTERN_SHIFT);
+}
 
 /*
  * Don't do ra_flags++ directly to avoid possible overflow:
--- linux.orig/mm/readahead.c	2010-02-24 10:44:44.000000000 +0800
+++ linux/mm/readahead.c	2010-02-24 10:44:45.000000000 +0800
@@ -339,7 +339,10 @@ unsigned long max_sane_readahead(unsigne
  * Submit IO for the read-ahead request in file_ra_state.
  */
 unsigned long ra_submit(struct file_ra_state *ra,
-		       struct address_space *mapping, struct file *filp)
+			struct address_space *mapping,
+			struct file *filp,
+			pgoff_t offset,
+			unsigned long req_size)
 {
 	int actual;
 
@@ -473,6 +476,7 @@ ondemand_readahead(struct address_space 
 	 * start of file
 	 */
 	if (!offset) {
+		ra_set_pattern(ra, RA_PATTERN_INITIAL);
 		ra->start = offset;
 		ra->size = get_init_ra_size(req_size, max);
 		ra->async_size = ra->size > req_size ?
@@ -493,6 +497,7 @@ ondemand_readahead(struct address_space 
 	 */
 	if ((offset == (ra->start + ra->size - ra->async_size) ||
 	     offset == (ra->start + ra->size))) {
+		ra_set_pattern(ra, RA_PATTERN_SUBSEQUENT);
 		ra->start += ra->size;
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
@@ -503,6 +508,7 @@ ondemand_readahead(struct address_space 
 	 * oversize read, no need to query page cache
 	 */
 	if (req_size > max && !hit_readahead_marker) {
+		ra_set_pattern(ra, RA_PATTERN_INITIAL);
 		ra->start = offset;
 		ra->size = max;
 		ra->async_size = max;
@@ -548,8 +554,10 @@ context_readahead:
 	 */
 	if (!size && !hit_readahead_marker) {
 		if (!ra_thrashed(ra, offset)) {
+			ra_set_pattern(ra, RA_PATTERN_RANDOM);
 			ra->size = min(req_size, max);
 		} else {
+			ra_set_pattern(ra, RA_PATTERN_THRASH);
 			retain_inactive_pages(mapping, offset, min(2 * max,
 						ra->start + ra->size - offset));
 			ra->size = max_t(int, ra->size/2, MIN_READAHEAD_PAGES);
@@ -566,12 +574,13 @@ context_readahead:
 	if (size >= offset)
 		size *= 2;
 	/*
-	 * pages to readahead are already cached
+	 * Pages to readahead are already cached?
 	 */
 	if (size <= start - offset)
 		return 0;
-
 	size -= start - offset;
+
+	ra_set_pattern(ra, RA_PATTERN_CONTEXT);
 	ra->start = start;
 	ra->size = clamp_t(unsigned int, size, MIN_READAHEAD_PAGES, max);
 	ra->async_size = min(ra->size, 1 + size / READAHEAD_ASYNC_RATIO);
@@ -587,7 +596,7 @@ readit:
 		ra->size += ra->async_size;
 	}
 
-	return ra_submit(ra, mapping, filp);
+	return ra_submit(ra, mapping, filp, offset, req_size);
 }
 
 /**
--- linux.orig/include/linux/mm.h	2010-02-24 10:44:41.000000000 +0800
+++ linux/include/linux/mm.h	2010-02-24 10:44:45.000000000 +0800
@@ -1208,7 +1208,9 @@ void page_cache_async_readahead(struct a
 unsigned long max_sane_readahead(unsigned long nr);
 unsigned long ra_submit(struct file_ra_state *ra,
 			struct address_space *mapping,
-			struct file *filp);
+			struct file *filp,
+			pgoff_t offset,
+			unsigned long req_size);
 
 /* Do stack extension */
 extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
--- linux.orig/mm/filemap.c	2010-02-24 10:44:43.000000000 +0800
+++ linux/mm/filemap.c	2010-02-24 10:44:45.000000000 +0800
@@ -1413,6 +1413,7 @@ static void do_sync_mmap_readahead(struc
 
 	if (VM_SequentialReadHint(vma) ||
 			offset - 1 == (ra->prev_pos >> PAGE_CACHE_SHIFT)) {
+		ra->ra_flags |= READAHEAD_MMAP;
 		page_cache_sync_readahead(mapping, ra, file, offset,
 					  ra->ra_pages);
 		return;
@@ -1431,10 +1432,12 @@ static void do_sync_mmap_readahead(struc
 	 */
 	ra_pages = max_sane_readahead(ra->ra_pages);
 	if (ra_pages) {
+		ra->ra_flags |= READAHEAD_MMAP;
+		ra_set_pattern(ra, RA_PATTERN_MMAP_AROUND);
 		ra->start = max_t(long, 0, offset - ra_pages/2);
 		ra->size = ra_pages;
 		ra->async_size = 0;
-		ra_submit(ra, mapping, file);
+		ra_submit(ra, mapping, file, offset, 1);
 	}
 }
 
@@ -1454,9 +1457,11 @@ static void do_async_mmap_readahead(stru
 	if (VM_RandomReadHint(vma))
 		return;
 	ra_mmap_miss_dec(ra);
-	if (PageReadahead(page))
+	if (PageReadahead(page)) {
+		ra->ra_flags |= READAHEAD_MMAP;
 		page_cache_async_readahead(mapping, ra, file,
 					   page, offset, ra->ra_pages);
+	}
 }
 
 /**


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2010-02-24  3:10 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-24  3:10 [PATCH 00/15] 512K readahead size with thrashing safe readahead v2 Wu Fengguang
2010-02-24  3:10 ` [PATCH 01/15] readahead: limit readahead size for small devices Wu Fengguang
2010-02-25  3:11   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 02/15] readahead: retain inactive lru pages to be accessed soon Wu Fengguang
2010-02-25  3:17   ` Rik van Riel
2010-02-25 12:27     ` Wu Fengguang
2010-02-24  3:10 ` [PATCH 03/15] readahead: bump up the default readahead size Wu Fengguang
2010-02-25  4:02   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 04/15] readahead: make default readahead size a kernel parameter Wu Fengguang
2010-02-25 14:59   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 05/15] readahead: limit readahead size for small memory systems Wu Fengguang
2010-02-25 15:00   ` Rik van Riel
2010-02-25 15:25   ` Christian Ehrhardt
2010-02-26  2:29     ` Wu Fengguang
2010-02-26  2:48       ` [PATCH] readahead: add notes on readahead size Wu Fengguang
2010-02-26 14:17         ` Vivek Goyal
2010-02-26  7:23       ` [PATCH 05/15] readahead: limit readahead size for small memory systems Christian Ehrhardt
2010-02-26  7:38         ` Wu Fengguang
2010-02-24  3:10 ` [PATCH 06/15] readahead: replace ra->mmap_miss with ra->ra_flags Wu Fengguang
2010-02-25 15:52   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 07/15] readahead: thrashing safe context readahead Wu Fengguang
2010-02-25 16:24   ` Rik van Riel
2010-02-24  3:10 ` Wu Fengguang [this message]
2010-02-25 22:37   ` [PATCH 08/15] readahead: record readahead patterns Rik van Riel
2010-02-24  3:10 ` [PATCH 09/15] readahead: add tracing event Wu Fengguang
2010-02-25 22:38   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 10/15] readahead: add /debug/readahead/stats Wu Fengguang
2010-02-25 22:40   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 11/15] readahead: dont do start-of-file readahead after lseek() Wu Fengguang
2010-02-25 22:42   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 12/15] radixtree: introduce radix_tree_lookup_leaf_node() Wu Fengguang
2010-02-25 23:13   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 13/15] radixtree: speed up the search for hole Wu Fengguang
2010-02-25 23:37   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 14/15] readahead: reduce MMAP_LOTSAMISS for mmap read-around Wu Fengguang
2010-02-25 23:42   ` Rik van Riel
2010-02-24  3:10 ` [PATCH 15/15] readahead: pagecache context based " Wu Fengguang
2010-02-26  1:33   ` Rik van Riel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100224031054.734136802@intel.com \
    --to=fengguang.wu@intel.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=jens.axboe@oracle.com \
    --cc=mingo@elte.hu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).