public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Fengguang Wu <wfg@mail.ustc.edu.cn>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	linux-kernel@vger.kernel.org, Nick Piggin <npiggin@suse.de>
Subject: Re: [PATCH 0/9] mmap read-around and readahead
Date: Tue, 18 Dec 2007 20:13:22 +0800	[thread overview]
Message-ID: <397980013.15006@ustc.edu.cn> (raw)
Message-ID: <E1J4bK6-0001BG-Mp@localhost> (raw)
In-Reply-To: <20071218114609.GA27778@mail.ustc.edu.cn>

On Tue, Dec 18, 2007 at 07:46:09PM +0800, Fengguang Wu wrote:
> No timings for now... but I wrote a debug patch(attached) and watched
> it running for about a week.  Here are some interesting numbers:

Here are the (forgotten) readahead-debug.patch:

---
 include/linux/fs.h |   43 ++++++++++++++++++++++++++++++++++
 mm/Kconfig         |   19 +++++++++++++++
 mm/filemap.c       |    1 
 mm/readahead.c     |   54 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 116 insertions(+), 1 deletion(-)

--- linux-2.6.24-rc4-mm1.orig/include/linux/fs.h
+++ linux-2.6.24-rc4-mm1/include/linux/fs.h
@@ -760,11 +760,54 @@ struct file_ra_state {
 	unsigned int async_size;	/* do asynchronous readahead when
 					   there are only # of pages ahead */
 
+	unsigned int flags;
 	unsigned int ra_pages;		/* Maximum readahead window */
 	int mmap_miss;			/* Cache miss stat for mmap accesses */
 	loff_t prev_pos;		/* Cache last read() position */
 };
 
+#define RA_CLASS_SHIFT		4
+#define RA_CLASS_MASK		((1 << RA_CLASS_SHIFT) - 1)
+/*
+ * Detailed classification of read-ahead behaviors.
+ */
+enum ra_class {
+	RA_CLASS_INIT0,
+	RA_CLASS_INIT,
+	RA_CLASS_SEQUENTIAL,
+	RA_CLASS_INTERLEAVED,
+	RA_CLASS_CONTEXT,
+	RA_CLASS_AROUND,
+	RA_CLASS_COUNT
+};
+
+static inline enum ra_class ra_class_new(struct file_ra_state *ra)
+{
+	return ra->flags & RA_CLASS_MASK;
+}
+
+static inline enum ra_class ra_class_old(struct file_ra_state *ra)
+{
+	return (ra->flags >> RA_CLASS_SHIFT) & RA_CLASS_MASK;
+}
+
+/*
+ * Which method is issuing this read-ahead?
+ */
+static inline void ra_set_class(struct file_ra_state *ra, enum ra_class ra_class)
+{
+	unsigned long flags_mask;
+	unsigned long flags;
+	unsigned long old_ra_class;
+
+	flags_mask = ~(RA_CLASS_MASK | (RA_CLASS_MASK << RA_CLASS_SHIFT));
+	flags = ra->flags & flags_mask;
+
+	old_ra_class = ra_class_new(ra) << RA_CLASS_SHIFT;
+
+	ra->flags = flags | old_ra_class | ra_class;
+}
+
 /*
  * Check if @index falls in the readahead windows.
  */
--- linux-2.6.24-rc4-mm1.orig/mm/Kconfig
+++ linux-2.6.24-rc4-mm1/mm/Kconfig
@@ -194,3 +194,22 @@ config NR_QUICK
 config VIRT_TO_BUS
 	def_bool y
 	depends on !ARCH_NO_VIRT_TO_BUS
+
+config DEBUG_READAHEAD
+	bool "Readahead debug and accounting"
+	default y
+	select DEBUG_FS
+	help
+	  This option injects extra code to dump detailed debug traces and do
+	  readahead events accounting.
+
+	  To actually get the data:
+
+	  mkdir /debug
+	  mount -t debug none /debug
+
+	  After that you can do the following:
+
+	  echo > /debug/readahead/events # reset the counters
+	  cat /debug/readahead/events    # check the counters
+
--- linux-2.6.24-rc4-mm1.orig/mm/readahead.c
+++ linux-2.6.24-rc4-mm1/mm/readahead.c
@@ -16,6 +16,29 @@
 #include <linux/task_io_accounting_ops.h>
 #include <linux/pagevec.h>
 #include <linux/pagemap.h>
+#include <linux/debugfs.h>
+
+static const char * const ra_class_name[] = {
+	[RA_CLASS_INIT0]	= "init0",
+	[RA_CLASS_INIT]		= "init",
+	[RA_CLASS_SEQUENTIAL]	= "sequential",
+	[RA_CLASS_INTERLEAVED]	= "interleaved",
+	[RA_CLASS_CONTEXT]	= "context",
+	[RA_CLASS_AROUND]	= "around",
+};
+
+#ifdef CONFIG_DEBUG_READAHEAD
+static u32 readahead_debug_level = 1;
+#  define debug_option(o)		(o)
+#else
+#  define debug_option(o)		(0)
+#  define readahead_debug_level 	(0)
+#endif /* CONFIG_DEBUG_READAHEAD */
+
+#define dprintk(args...) \
+	do { if (readahead_debug_level >= 2) printk(KERN_DEBUG args); } while(0)
+#define ddprintk(args...) \
+	do { if (readahead_debug_level >= 3) printk(KERN_DEBUG args); } while(0)
 
 void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
 {
@@ -220,6 +243,13 @@ unsigned long max_sane_readahead(unsigne
 
 static int __init readahead_init(void)
 {
+#ifdef CONFIG_DEBUG_READAHEAD
+	struct dentry *root;
+
+	root = debugfs_create_dir("readahead", NULL);
+
+	debugfs_create_u32("debug_level", 0644, root, &readahead_debug_level);
+#endif
 	return bdi_init(&default_backing_dev_info);
 }
 subsys_initcall(readahead_init);
@@ -235,6 +265,15 @@ unsigned long ra_submit(struct file_ra_s
 	actual = __do_page_cache_readahead(mapping, filp,
 					ra->start, ra->size, ra->async_size);
 
+	dprintk("readahead-%s(process: %s/%d, file: %s/%s, "
+			"offset=%ld:%ld, ra=%ld+%d-%d) = %d\n",
+			ra_class_name[ra_class_new(ra)],
+			current->comm, current->pid,
+			mapping->host->i_sb->s_id,
+			filp->f_path.dentry->d_iname,
+			(long)(filp->f_pos >> PAGE_CACHE_SHIFT),
+			(long)(ra->prev_pos >> PAGE_CACHE_SHIFT),
+			ra->start, ra->size, ra->async_size, actual);
 	return actual;
 }
 
@@ -337,6 +376,7 @@ ondemand_readahead(struct address_space 
 		ra->start += ra->size;
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
+		ra_set_class(ra, RA_CLASS_SEQUENTIAL);
 		goto readit;
 	}
 
@@ -348,8 +388,15 @@ ondemand_readahead(struct address_space 
 	 * Read as is, and do not pollute the readahead state.
 	 */
 	if (!hit_readahead_marker && !sequential) {
-		return __do_page_cache_readahead(mapping, filp,
+		int actual = __do_page_cache_readahead(mapping, filp,
 						offset, req_size, 0);
+		dprintk("read-random(process: %s/%d, file: %s/%s, "
+			"req=%ld+%ld) = %d\n",
+				current->comm, current->pid,
+				mapping->host->i_sb->s_id,
+				filp->f_path.dentry->d_iname,
+				offset, req_size, actual);
+		return actual;
 	}
 
 	/*
@@ -372,6 +419,7 @@ ondemand_readahead(struct address_space 
 		ra->size = start - offset;	/* old async_size */
 		ra->size = get_next_ra_size(ra, max);
 		ra->async_size = ra->size;
+		ra_set_class(ra, RA_CLASS_INTERLEAVED);
 		goto readit;
 	}
 
@@ -385,6 +433,10 @@ ondemand_readahead(struct address_space 
 	ra->start = offset;
 	ra->size = get_init_ra_size(req_size, max);
 	ra->async_size = ra->size > req_size ? ra->size - req_size : ra->size;
+	if (offset)
+		ra_set_class(ra, RA_CLASS_INIT);
+	else
+		ra_set_class(ra, RA_CLASS_INIT0);
 
 readit:
 	/*
--- linux-2.6.24-rc4-mm1.orig/mm/filemap.c
+++ linux-2.6.24-rc4-mm1/mm/filemap.c
@@ -1340,6 +1340,7 @@ static void do_sync_mmap_readahead(struc
 		ra->start = max_t(long, 0, offset - ra_pages / 2);
 		ra->size = ra_pages;
 		ra->async_size = 0;
+		ra_set_class(ra, RA_CLASS_AROUND);
 		ra_submit(ra, mapping, file);
 	}
 }


  parent reply	other threads:[~2007-12-18 12:13 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20071216115927.986126305@mail.ustc.edu.cn>
2007-12-16 11:59 ` [PATCH 0/9] mmap read-around and readahead Fengguang Wu
2007-12-16 23:35   ` Linus Torvalds
     [not found]     ` <E1J4atl-0000UI-4a@localhost>
2007-12-18 11:46       ` Fengguang Wu
     [not found]     ` <20071218114609.GA27778@mail.ustc.edu.cn>
     [not found]       ` <E1J4bK6-0001BG-Mp@localhost>
2007-12-18 12:13         ` Fengguang Wu [this message]
     [not found]     ` <E1J4tUN-0002IB-F6@localhost>
2007-12-19  7:37       ` Fengguang Wu
     [not found] ` <20071216120417.586021813@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 1/9] readahead: simplify readahead call scheme Fengguang Wu
     [not found] ` <20071216120417.748714367@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 2/9] readahead: clean up and simplify the code for filemap page fault readahead Fengguang Wu
2007-12-18  8:19   ` Nick Piggin
     [not found]     ` <E1J4ay1-0000ak-3e@localhost>
2007-12-18 11:50       ` Fengguang Wu
2007-12-18 23:54       ` Nick Piggin
     [not found]         ` <E1J4spp-0001pR-JQ@localhost>
2007-12-19  6:55           ` Fengguang Wu
     [not found] ` <20071216120417.905514988@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 3/9] readahead: auto detection of sequential mmap reads Fengguang Wu
     [not found] ` <20071216120418.055796608@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 4/9] readahead: quick startup on sequential mmap readahead Fengguang Wu
     [not found] ` <20071216120418.201213716@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 5/9] readahead: make ra_submit() non-static Fengguang Wu
     [not found] ` <20071216120418.360445517@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 6/9] readahead: save mmap read-around states in file_ra_state Fengguang Wu
     [not found] ` <20071216120418.498110756@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 7/9] readahead: remove unused do_page_cache_readahead() Fengguang Wu
     [not found] ` <20071216120418.639781633@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 8/9] readahead: move max_sane_readahead() calls into force_page_cache_readahead() Fengguang Wu
     [not found] ` <20071216120418.787765636@mail.ustc.edu.cn>
2007-12-16 11:59   ` [PATCH 9/9] readahead: call max_sane_readahead() in ondemand_readahead() Fengguang Wu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=397980013.15006@ustc.edu.cn \
    --to=wfg@mail.ustc.edu.cn \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=npiggin@suse.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox