All of lore.kernel.org
 help / color / mirror / Atom feed
From: Wu Fengguang <wfg@mail.ustc.edu.cn>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@osdl.org>, Wu Fengguang <wfg@mail.ustc.edu.cn>
Subject: [PATCH 06/16] readahead: parameters
Date: Sat, 03 Dec 2005 15:14:50 +0800	[thread overview]
Message-ID: <20051203071727.864172000@localhost.localdomain> (raw)
In-Reply-To: 20051203071444.260068000@localhost.localdomain

[-- Attachment #1: readahead-parameters.patch --]
[-- Type: text/plain, Size: 7184 bytes --]

- new sysctl entries in /proc/sys/vm:
	- readahead_ratio = 50
	- readahead_hit_rate = 2
	- readahead_live_chunk = 0
- dynamic minimal/initial read-ahead size.

For now, different ranges of readahead_ratio select different read-ahead
code path:

	condition			action
===================================================================
readahead_ratio == 0		disable read-ahead
readahead_ratio < 9		select old read-ahead logic
readahead_ratio >= 9		select new read-ahead logic

Signed-off-by: Wu Fengguang <wfg@mail.ustc.edu.cn>
---

 Documentation/sysctl/vm.txt |   46 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mm.h          |    2 -
 include/linux/sysctl.h      |    3 ++
 kernel/sysctl.c             |   34 ++++++++++++++++++++++++++++++++
 mm/readahead.c              |   39 +++++++++++++++++++++++++++++++++++++
 5 files changed, 123 insertions(+), 1 deletion(-)

--- linux.orig/Documentation/sysctl/vm.txt
+++ linux/Documentation/sysctl/vm.txt
@@ -27,6 +27,9 @@ Currently, these files are in /proc/sys/
 - laptop_mode
 - block_dump
 - swap_prefetch
+- readahead_ratio
+- readahead_hit_rate
+- readahead_live_chunk
 
 ==============================================================
 
@@ -114,3 +117,46 @@ except when laptop_mode is enabled and t
 Setting it to 0 disables prefetching entirely.
 
 The default value is dependant on ramsize.
+
+==============================================================
+
+readahead_ratio
+
+This limits read-ahead size to percent of the thrashing-threshold.
+The thrashing-threshold is dynamicly estimated according to the
+_history_ read speed and system load, and used to limit the
+_future_ read-ahead request size.
+
+Set it to a low value if you have not enough memory to counteract
+the I/O load fluctuations. But if there's plenty of memory, set it
+to a larger value might help increase read speed.
+
+The default value is 50.
+
+==============================================================
+
+readahead_hit_rate
+
+This is the max allowed value of (read-ahead-pages : accessed-pages).
+If the previous read-ahead request has bad hit rate, kernel will be
+very conservative to issue the next read-ahead.
+
+A large value helps speedup some sparse access patterns, at the cost
+of more memory consumption. It is recommended to keep the value below
+(max-readahead-pages / 8).
+
+The default value is 2.
+
+==============================================================
+
+readahead_live_chunk
+
+In a file server, there are typically one or more sequential
+readers working on a file. The kernel can detect most live
+chunks(a sequence of pages to be accessed by an active reader),
+and save them for their imminent readers.
+
+This parameter controls the max allowed chunk size, i.e. the max
+number of pages pinned for an active reader.
+
+The default value is 0(off). Increase it if you have enough memory.
--- linux.orig/include/linux/mm.h
+++ linux/include/linux/mm.h
@@ -991,7 +991,7 @@ extern int filemap_populate(struct vm_ar
 int write_one_page(struct page *page, int wait);
 
 /* readahead.c */
-#define VM_MAX_READAHEAD	128	/* kbytes */
+#define VM_MAX_READAHEAD	1024	/* kbytes */
 #define VM_MIN_READAHEAD	16	/* kbytes (includes current page) */
 #define VM_MAX_CACHE_HIT    	256	/* max pages in a row in cache before
 					 * turning readahead off */
--- linux.orig/include/linux/sysctl.h
+++ linux/include/linux/sysctl.h
@@ -182,6 +182,9 @@ enum
 	VM_LEGACY_VA_LAYOUT=27, /* legacy/compatibility virtual address space layout */
 	VM_SWAP_TOKEN_TIMEOUT=28, /* default time for token time out */
 	VM_SWAP_PREFETCH=29,	/* int: amount to swap prefetch */
+	VM_READAHEAD_RATIO=30, /* percent of read-ahead size to thrashing-threshold */
+	VM_READAHEAD_HIT_RATE=31, /* one accessed page legitimizes so many read-ahead pages */
+	VM_READAHEAD_LIVE_CHUNK=32, /* pin no more than that many pages for a live reader */
 };
 
 
--- linux.orig/kernel/sysctl.c
+++ linux/kernel/sysctl.c
@@ -68,6 +68,9 @@ extern int min_free_kbytes;
 extern int printk_ratelimit_jiffies;
 extern int printk_ratelimit_burst;
 extern int pid_max_min, pid_max_max;
+extern int readahead_ratio;
+extern int readahead_hit_rate;
+extern int readahead_live_chunk;
 
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 int unknown_nmi_panic;
@@ -668,6 +671,7 @@ static ctl_table kern_table[] = {
 /* Constants for minimum and maximum testing in vm_table.
    We use these as one-element integer vectors. */
 static int zero;
+static int one = 1;
 static int one_hundred = 100;
 
 
@@ -867,6 +871,36 @@ static ctl_table vm_table[] = {
 	},
 #endif
 #endif
+	{
+		.ctl_name	= VM_READAHEAD_RATIO,
+		.procname	= "readahead_ratio",
+		.data		= &readahead_ratio,
+		.maxlen		= sizeof(readahead_ratio),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
+	{
+		.ctl_name	= VM_READAHEAD_HIT_RATE,
+		.procname	= "readahead_hit_rate",
+		.data		= &readahead_hit_rate,
+		.maxlen		= sizeof(readahead_hit_rate),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &one,
+	},
+	{
+		.ctl_name	= VM_READAHEAD_LIVE_CHUNK,
+		.procname	= "readahead_live_chunk",
+		.data		= &readahead_live_chunk,
+		.maxlen		= sizeof(readahead_live_chunk),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &zero,
+	},
 	{ .ctl_name = 0 }
 };
 
--- linux.orig/mm/readahead.c
+++ linux/mm/readahead.c
@@ -20,6 +20,24 @@
 #define MAX_RA_PAGES	KB(VM_MAX_READAHEAD)
 #define MIN_RA_PAGES	KB(VM_MIN_READAHEAD)
 
+/* In laptop mode, poll delayed look-ahead on every ## pages read. */
+#define LAPTOP_POLL_INTERVAL 16
+
+/* Set look-ahead size to 1/# of the thrashing-threshold. */
+#define LOOKAHEAD_RATIO 8
+
+/* Set read-ahead size to ##% of the thrashing-threshold. */
+int readahead_ratio = 50;
+EXPORT_SYMBOL(readahead_ratio);
+
+/* Readahead as long as cache hit ratio keeps above 1/##. */
+int readahead_hit_rate = 2;
+EXPORT_SYMBOL(readahead_hit_rate);
+
+/* Scan backward ## pages to find a live reader. */
+int readahead_live_chunk = 0;
+EXPORT_SYMBOL(readahead_live_chunk);
+
 /* Detailed classification of read-ahead behaviors. */
 #define RA_CLASS_SHIFT 4
 #define RA_CLASS_MASK  ((1 << RA_CLASS_SHIFT) - 1)
@@ -796,6 +814,27 @@ out:
 }
 
 /*
+ * ra_size is mainly determined by:
+ * 1. sequential-start: min(MIN_RA_PAGES + (pages>>14), KB(128))
+ * 2. sequential-max:	min(ra->ra_pages, 0xFFFF)
+ * 3. sequential:	(thrashing-threshold) * readahead_ratio / 100
+ *
+ * Table of concrete numbers for 4KB page size:
+ *  (inactive + free) (in MB):    4   8   16   32   64  128  256  512 1024
+ *    initial ra_size (in KB):   16  16   16   16   20   24   32   48   64
+ */
+static inline void get_readahead_bounds(struct file_ra_state *ra,
+					unsigned long *ra_min,
+					unsigned long *ra_max)
+{
+	unsigned long pages;
+
+	pages = node_free_and_cold_pages();
+	*ra_max = min(min(pages/2, 0xFFFFUL), ra->ra_pages);
+	*ra_min = min(min(MIN_RA_PAGES + (pages>>14), KB(128)), *ra_max/2);
+}
+
+/*
  * This is the entry point of the adaptive read-ahead logic.
  *
  * It is only called on two conditions:

--

  parent reply	other threads:[~2005-12-03  7:11 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-12-03  7:14 [PATCH 00/16] Adaptive read-ahead V9 Wu Fengguang
2005-12-03  7:14 ` [PATCH 01/16] mm: delayed page activation Wu Fengguang
2005-12-04 12:11   ` Nikita Danilov
2005-12-04 13:48     ` Wu Fengguang
2005-12-04 15:03       ` Nikita Danilov
2005-12-04 15:37         ` Help!Unable to handle kernel NULL pointer tony
2005-12-04 19:10         ` [PATCH 01/16] mm: delayed page activation Peter Zijlstra
2005-12-05  1:48         ` Wu Fengguang
2005-12-06 17:55           ` Nikita Danilov
2005-12-07  1:42             ` Wu Fengguang
2005-12-07  9:46               ` Andrew Morton
2005-12-07 10:36                 ` Wu Fengguang
2005-12-07 12:44               ` Nikita Danilov
2005-12-07 13:53                 ` Wu Fengguang
2005-12-03  7:14 ` [PATCH 02/16] radixtree: sync with mainline Wu Fengguang
2005-12-04 23:57   ` Andrew Morton
2005-12-05  1:43     ` Wu Fengguang
2005-12-05  4:05     ` Wu Fengguang
2005-12-05 17:22       ` Christoph Lameter
2005-12-05 10:44     ` Wu Fengguang
2005-12-05 17:24       ` Christoph Lameter
2005-12-06  2:23         ` Wu Fengguang
2005-12-03  7:14 ` [PATCH 03/16] radixtree: look-aside cache Wu Fengguang
2005-12-03  7:14 ` [PATCH 04/16] readahead: some preparation Wu Fengguang
2005-12-03  7:14 ` [PATCH 05/16] readahead: call scheme Wu Fengguang
2005-12-03  7:14 ` Wu Fengguang [this message]
2005-12-03  7:14 ` [PATCH 07/16] readahead: state based method Wu Fengguang
2005-12-03  7:14 ` [PATCH 08/16] readahead: context " Wu Fengguang
2005-12-03  7:14 ` [PATCH 09/16] readahead: read-around method for mmap file Wu Fengguang
2005-12-03  7:14 ` [PATCH 10/16] readahead: other methods Wu Fengguang
2005-12-03  7:14 ` [PATCH 11/16] readahead: detect and rescue live pages Wu Fengguang
2005-12-03  7:14 ` [PATCH 12/16] readahead: events accounting Wu Fengguang
2005-12-03  7:14 ` [PATCH 13/16] readahead: laptop mode support Wu Fengguang
2005-12-03  7:14 ` [PATCH 14/16] readahead: disable look-ahead for loopback file Wu Fengguang
2005-12-03  7:14 ` [PATCH 15/16] readahead: nfsd support Wu Fengguang
2005-12-03  7:15 ` [PATCH 16/16] io: prevent too much latency in the read-ahead code Wu Fengguang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20051203071727.864172000@localhost.localdomain \
    --to=wfg@mail.ustc.edu.cn \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.