From: Wu Fengguang <fengguang.wu@intel.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Jens Axboe <jens.axboe@oracle.com>, Ingo Molnar <mingo@elte.hu>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Wu Fengguang <fengguang.wu@intel.com>,
Chris Mason <chris.mason@oracle.com>,
Clemens Ladisch <clemens@ladisch.de>,
Olivier Galibert <galibert@pobox.com>,
Vivek Goyal <vgoyal@redhat.com>,
Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>,
Matt Mackall <mpm@selenic.com>, Nick Piggin <npiggin@suse.de>,
Linux Memory Management List <linux-mm@kvack.org>,
linux-fsdevel@vger.kernel.org,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH 10/15] readahead: add /debug/readahead/stats
Date: Wed, 24 Feb 2010 11:10:11 +0800 [thread overview]
Message-ID: <20100224031055.024165020@intel.com> (raw)
In-Reply-To: 20100224031001.026464755@intel.com
[-- Attachment #1: readahead-stats.patch --]
[-- Type: text/plain, Size: 8554 bytes --]
Collect readahead stats when CONFIG_READAHEAD_STATS=y.
This is enabled by default because the added overheads are trivial:
two readahead_stats() calls per readahead.
Example output:
(taken from a fresh booted NFS-ROOT box with rsize=16k)
$ cat /debug/readahead/stats
pattern readahead eof_hit cache_hit io sync_io mmap_io size async_size io_size
initial 524 216 26 498 498 18 7 4 4
subsequent 181 80 1 130 13 60 25 25 24
context 94 28 3 85 64 8 7 2 5
thrash 0 0 0 0 0 0 0 0 0
around 162 121 33 162 162 162 60 0 21
fadvise 0 0 0 0 0 0 0 0 0
random 137 0 0 137 137 0 1 0 1
all 1098 445 63 1012 874 0 17 6 9
The two most important columns are
- io number of readahead IO
- io_size average readahead IO size
CC: Ingo Molnar <mingo@elte.hu>
CC: Jens Axboe <jens.axboe@oracle.com>
CC: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
---
mm/Kconfig | 13 +++
mm/readahead.c | 187 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 198 insertions(+), 2 deletions(-)
--- linux.orig/mm/readahead.c 2010-02-24 10:44:46.000000000 +0800
+++ linux/mm/readahead.c 2010-02-24 10:44:47.000000000 +0800
@@ -89,6 +89,189 @@ EXPORT_SYMBOL_GPL(file_ra_state_init);
#define list_to_page(head) (list_entry((head)->prev, struct page, lru))
+#ifdef CONFIG_READAHEAD_STATS
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+enum ra_account {
+ /* number of readaheads */
+ RA_ACCOUNT_COUNT, /* readahead request */
+ RA_ACCOUNT_EOF, /* readahead request contains/beyond EOF page */
+ RA_ACCOUNT_CHIT, /* readahead request covers some cached pages */
+ RA_ACCOUNT_IOCOUNT, /* readahead IO */
+ RA_ACCOUNT_SYNC, /* readahead IO that is synchronous */
+ RA_ACCOUNT_MMAP, /* readahead IO by mmap accesses */
+ /* number of readahead pages */
+ RA_ACCOUNT_SIZE, /* readahead size */
+ RA_ACCOUNT_ASIZE, /* readahead async size */
+ RA_ACCOUNT_ACTUAL, /* readahead actual IO size */
+ /* end mark */
+ RA_ACCOUNT_MAX,
+};
+
+static unsigned long ra_stats[RA_PATTERN_MAX][RA_ACCOUNT_MAX];
+
+static void readahead_stats(struct address_space *mapping,
+ pgoff_t offset,
+ unsigned long req_size,
+ unsigned int ra_flags,
+ pgoff_t start,
+ unsigned int size,
+ unsigned int async_size,
+ int actual)
+{
+ unsigned int pattern = ra_pattern(ra_flags);
+
+ ra_stats[pattern][RA_ACCOUNT_COUNT]++;
+ ra_stats[pattern][RA_ACCOUNT_SIZE] += size;
+ ra_stats[pattern][RA_ACCOUNT_ASIZE] += async_size;
+ ra_stats[pattern][RA_ACCOUNT_ACTUAL] += actual;
+
+ if (actual < size) {
+ if (start + size >
+ (i_size_read(mapping->host) - 1) >> PAGE_CACHE_SHIFT)
+ ra_stats[pattern][RA_ACCOUNT_EOF]++;
+ else
+ ra_stats[pattern][RA_ACCOUNT_CHIT]++;
+ }
+
+ if (!actual)
+ return;
+
+ ra_stats[pattern][RA_ACCOUNT_IOCOUNT]++;
+
+ if (start <= offset && start + size > offset)
+ ra_stats[pattern][RA_ACCOUNT_SYNC]++;
+
+ if (ra_flags & READAHEAD_MMAP)
+ ra_stats[pattern][RA_ACCOUNT_MMAP]++;
+}
+
+static int readahead_stats_show(struct seq_file *s, void *_)
+{
+ static const char * const ra_pattern_names[] = {
+ [RA_PATTERN_INITIAL] = "initial",
+ [RA_PATTERN_SUBSEQUENT] = "subsequent",
+ [RA_PATTERN_CONTEXT] = "context",
+ [RA_PATTERN_THRASH] = "thrash",
+ [RA_PATTERN_MMAP_AROUND] = "around",
+ [RA_PATTERN_FADVISE] = "fadvise",
+ [RA_PATTERN_RANDOM] = "random",
+ [RA_PATTERN_ALL] = "all",
+ };
+ unsigned long count, iocount;
+ unsigned long i;
+
+ seq_printf(s, "%-10s %10s %10s %10s %10s %10s %10s %10s %10s %10s\n",
+ "pattern",
+ "readahead", "eof_hit", "cache_hit",
+ "io", "sync_io", "mmap_io",
+ "size", "async_size", "io_size");
+
+ for (i = 0; i < RA_PATTERN_MAX; i++) {
+ count = ra_stats[i][RA_ACCOUNT_COUNT];
+ iocount = ra_stats[i][RA_ACCOUNT_IOCOUNT];
+ /*
+ * avoid division-by-zero
+ */
+ if (count == 0)
+ count = 1;
+ if (iocount == 0)
+ iocount = 1;
+
+ seq_printf(s, "%-10s %10lu %10lu %10lu %10lu %10lu %10lu "
+ "%10lu %10lu %10lu\n",
+ ra_pattern_names[i],
+ ra_stats[i][RA_ACCOUNT_COUNT],
+ ra_stats[i][RA_ACCOUNT_EOF],
+ ra_stats[i][RA_ACCOUNT_CHIT],
+ ra_stats[i][RA_ACCOUNT_IOCOUNT],
+ ra_stats[i][RA_ACCOUNT_SYNC],
+ ra_stats[i][RA_ACCOUNT_MMAP],
+ ra_stats[i][RA_ACCOUNT_SIZE] / count,
+ ra_stats[i][RA_ACCOUNT_ASIZE] / count,
+ ra_stats[i][RA_ACCOUNT_ACTUAL] / iocount);
+ }
+
+ return 0;
+}
+
+static int readahead_stats_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, readahead_stats_show, NULL);
+}
+
+static ssize_t readahead_stats_write(struct file *file, const char __user *buf,
+ size_t size, loff_t *offset)
+{
+ memset(ra_stats, 0, sizeof(ra_stats));
+ return size;
+}
+
+static struct file_operations readahead_stats_fops = {
+ .owner = THIS_MODULE,
+ .open = readahead_stats_open,
+ .write = readahead_stats_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static struct dentry *ra_debug_root;
+
+static int debugfs_create_readahead(void)
+{
+ struct dentry *debugfs_stats;
+
+ ra_debug_root = debugfs_create_dir("readahead", NULL);
+ if (!ra_debug_root)
+ goto out;
+
+ debugfs_stats = debugfs_create_file("stats", 0644, ra_debug_root,
+ NULL, &readahead_stats_fops);
+ if (!debugfs_stats)
+ goto out;
+
+ return 0;
+out:
+ printk(KERN_ERR "readahead: failed to create debugfs entries\n");
+ return -ENOMEM;
+}
+
+static int __init readahead_init(void)
+{
+ debugfs_create_readahead();
+ return 0;
+}
+
+static void __exit readahead_exit(void)
+{
+ debugfs_remove_recursive(ra_debug_root);
+}
+
+module_init(readahead_init);
+module_exit(readahead_exit);
+#endif
+
+static void readahead_event(struct address_space *mapping,
+ pgoff_t offset,
+ unsigned long req_size,
+ unsigned int ra_flags,
+ pgoff_t start,
+ unsigned int size,
+ unsigned int async_size,
+ unsigned int actual)
+{
+#ifdef CONFIG_READAHEAD_STATS
+ readahead_stats(mapping, offset, req_size, ra_flags,
+ start, size, async_size, actual);
+ readahead_stats(mapping, offset, req_size,
+ RA_PATTERN_ALL << READAHEAD_PATTERN_SHIFT,
+ start, size, async_size, actual);
+#endif
+ trace_readahead(mapping, offset, req_size, ra_flags,
+ start, size, async_size, actual);
+}
+
/*
* see if a page needs releasing upon read_cache_pages() failure
* - the caller of read_cache_pages() may have set PG_private or PG_fscache
@@ -326,7 +509,7 @@ int force_page_cache_readahead(struct ad
nr_to_read -= this_chunk;
}
- trace_readahead(mapping, offset, nr_to_read,
+ readahead_event(mapping, offset, nr_to_read,
RA_PATTERN_FADVISE << READAHEAD_PATTERN_SHIFT,
offset, nr_to_read, 0, ret);
@@ -357,7 +540,7 @@ unsigned long ra_submit(struct file_ra_s
actual = __do_page_cache_readahead(mapping, filp,
ra->start, ra->size, ra->async_size);
- trace_readahead(mapping, offset, req_size, ra->ra_flags,
+ readahead_event(mapping, offset, req_size, ra->ra_flags,
ra->start, ra->size, ra->async_size, actual);
return actual;
--- linux.orig/mm/Kconfig 2010-02-24 10:44:23.000000000 +0800
+++ linux/mm/Kconfig 2010-02-24 10:44:47.000000000 +0800
@@ -283,3 +283,16 @@ config NOMMU_INITIAL_TRIM_EXCESS
of 1 says that all excess pages should be trimmed.
See Documentation/nommu-mmap.txt for more information.
+
+config READAHEAD_STATS
+ bool "Collect page-cache readahead stats"
+ depends on DEBUG_FS
+ default y
+ help
+ Enable readahead events accounting. Usage:
+
+ # mount -t debugfs none /debug
+
+ # echo > /debug/readahead/stats # reset counters
+ # do benchmarks
+ # cat /debug/readahead/stats # check counters
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2010-02-24 3:12 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-24 3:10 [PATCH 00/15] 512K readahead size with thrashing safe readahead v2 Wu Fengguang
2010-02-24 3:10 ` [PATCH 01/15] readahead: limit readahead size for small devices Wu Fengguang
2010-02-25 3:11 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 02/15] readahead: retain inactive lru pages to be accessed soon Wu Fengguang
2010-02-25 3:17 ` Rik van Riel
2010-02-25 12:27 ` Wu Fengguang
2010-02-24 3:10 ` [PATCH 03/15] readahead: bump up the default readahead size Wu Fengguang
2010-02-25 4:02 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 04/15] readahead: make default readahead size a kernel parameter Wu Fengguang
2010-02-25 14:59 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 05/15] readahead: limit readahead size for small memory systems Wu Fengguang
2010-02-25 15:00 ` Rik van Riel
2010-02-25 15:25 ` Christian Ehrhardt
2010-02-26 2:29 ` Wu Fengguang
2010-02-26 2:48 ` [PATCH] readahead: add notes on readahead size Wu Fengguang
2010-02-26 14:17 ` Vivek Goyal
2010-02-26 7:23 ` [PATCH 05/15] readahead: limit readahead size for small memory systems Christian Ehrhardt
2010-02-26 7:38 ` Wu Fengguang
2010-02-24 3:10 ` [PATCH 06/15] readahead: replace ra->mmap_miss with ra->ra_flags Wu Fengguang
2010-02-25 15:52 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 07/15] readahead: thrashing safe context readahead Wu Fengguang
2010-02-25 16:24 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 08/15] readahead: record readahead patterns Wu Fengguang
2010-02-25 22:37 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 09/15] readahead: add tracing event Wu Fengguang
2010-02-25 22:38 ` Rik van Riel
2010-02-24 3:10 ` Wu Fengguang [this message]
2010-02-25 22:40 ` [PATCH 10/15] readahead: add /debug/readahead/stats Rik van Riel
2010-02-24 3:10 ` [PATCH 11/15] readahead: dont do start-of-file readahead after lseek() Wu Fengguang
2010-02-25 22:42 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 12/15] radixtree: introduce radix_tree_lookup_leaf_node() Wu Fengguang
2010-02-25 23:13 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 13/15] radixtree: speed up the search for hole Wu Fengguang
2010-02-25 23:37 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 14/15] readahead: reduce MMAP_LOTSAMISS for mmap read-around Wu Fengguang
2010-02-25 23:42 ` Rik van Riel
2010-02-24 3:10 ` [PATCH 15/15] readahead: pagecache context based " Wu Fengguang
2010-02-26 1:33 ` Rik van Riel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100224031055.024165020@intel.com \
--to=fengguang.wu@intel.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=chris.mason@oracle.com \
--cc=clemens@ladisch.de \
--cc=ehrhardt@linux.vnet.ibm.com \
--cc=galibert@pobox.com \
--cc=jens.axboe@oracle.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mingo@elte.hu \
--cc=mpm@selenic.com \
--cc=npiggin@suse.de \
--cc=vgoyal@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).