From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-kernel <linux-kernel@vger.kernel.org>
Cc: Fengguang Wu <wfg@mail.ustc.edu.cn>, riel <riel@redhat.com>,
Andrew Morton <akpm@linux-foundation.org>,
Rusty Russell <rusty@rustcorp.com.au>,
Tim Pepper <lnxninja@us.ibm.com>, Chris Snook <csnook@redhat.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>
Subject: [PATCH 1/3] readahead: drop behind
Date: Sat, 21 Jul 2007 23:00:06 +0200 [thread overview]
Message-ID: <20070721210051.975382000@chello.nl> (raw)
In-Reply-To: 20070721210005.000228000@chello.nl
[-- Attachment #1: readahead-useonce.patch --]
[-- Type: text/plain, Size: 5776 bytes --]
Use the read-ahead code to provide hints to page reclaim.
This patch has the potential to solve the streaming-IO trashes my
desktop problem.
It tries to aggressively reclaim pages that were loaded in a strong
sequential pattern and have been consumed. Thereby limiting the damage
to the current resident set.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
include/linux/swap.h | 1 +
mm/readahead.c | 39 ++++++++++++++++++++++++++++++++++++++-
mm/swap.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 90 insertions(+), 1 deletion(-)
Index: linux-2.6/mm/swap.c
===================================================================
--- linux-2.6.orig/mm/swap.c
+++ linux-2.6/mm/swap.c
@@ -30,6 +30,7 @@
#include <linux/cpu.h>
#include <linux/notifier.h>
#include <linux/init.h>
+#include <linux/rmap.h>
/* How many pages do we try to swap or page in/out together? */
int page_cluster;
@@ -176,6 +177,7 @@ EXPORT_SYMBOL(mark_page_accessed);
*/
static DEFINE_PER_CPU(struct pagevec, lru_add_pvecs) = { 0, };
static DEFINE_PER_CPU(struct pagevec, lru_add_active_pvecs) = { 0, };
+static DEFINE_PER_CPU(struct pagevec, lru_demote_pvecs) = { 0, };
void fastcall lru_cache_add(struct page *page)
{
@@ -197,6 +199,37 @@ void fastcall lru_cache_add_active(struc
put_cpu_var(lru_add_active_pvecs);
}
+static void __pagevec_lru_demote(struct pagevec *pvec)
+{
+ int i;
+ struct zone *zone = NULL;
+
+ for (i = 0; i < pagevec_count(pvec); i++) {
+ struct page *page = pvec->pages[i];
+ struct zone *pagezone = page_zone(page);
+
+ if (pagezone != zone) {
+ if (zone)
+ spin_unlock_irq(&zone->lru_lock);
+ zone = pagezone;
+ spin_lock_irq(&zone->lru_lock);
+ }
+ if (PageLRU(page)) {
+ page_referenced(page, 0);
+ if (PageActive(page)) {
+ ClearPageActive(page);
+ __dec_zone_state(zone, NR_ACTIVE);
+ __inc_zone_state(zone, NR_INACTIVE);
+ }
+ list_move_tail(&page->lru, &zone->inactive_list);
+ }
+ }
+ if (zone)
+ spin_unlock_irq(&zone->lru_lock);
+ release_pages(pvec->pages, pvec->nr, pvec->cold);
+ pagevec_reinit(pvec);
+}
+
static void __lru_add_drain(int cpu)
{
struct pagevec *pvec = &per_cpu(lru_add_pvecs, cpu);
@@ -207,6 +240,9 @@ static void __lru_add_drain(int cpu)
pvec = &per_cpu(lru_add_active_pvecs, cpu);
if (pagevec_count(pvec))
__pagevec_lru_add_active(pvec);
+ pvec = &per_cpu(lru_demote_pvecs, cpu);
+ if (pagevec_count(pvec))
+ __pagevec_lru_demote(pvec);
}
void lru_add_drain(void)
@@ -403,6 +439,21 @@ void __pagevec_lru_add_active(struct pag
}
/*
+ * Function used to forcefully demote a page to the tail of the inactive
+ * list.
+ */
+void fastcall lru_demote(struct page *page)
+{
+ if (likely(get_page_unless_zero(page))) {
+ struct pagevec *pvec = &get_cpu_var(lru_demote_pvecs);
+
+ if (!pagevec_add(pvec, page))
+ __pagevec_lru_demote(pvec);
+ put_cpu_var(lru_demote_pvecs);
+ }
+}
+
+/*
* Try to drop buffers from the pages in a pagevec
*/
void pagevec_strip(struct pagevec *pvec)
Index: linux-2.6/include/linux/swap.h
===================================================================
--- linux-2.6.orig/include/linux/swap.h
+++ linux-2.6/include/linux/swap.h
@@ -180,6 +180,7 @@ extern unsigned int nr_free_pagecache_pa
/* linux/mm/swap.c */
extern void FASTCALL(lru_cache_add(struct page *));
extern void FASTCALL(lru_cache_add_active(struct page *));
+extern void FASTCALL(lru_demote(struct page *));
extern void FASTCALL(activate_page(struct page *));
extern void FASTCALL(mark_page_accessed(struct page *));
extern void lru_add_drain(void);
Index: linux-2.6/mm/readahead.c
===================================================================
--- linux-2.6.orig/mm/readahead.c
+++ linux-2.6/mm/readahead.c
@@ -15,6 +15,7 @@
#include <linux/backing-dev.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/pagevec.h>
+#include <linux/swap.h>
void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
{
@@ -448,13 +449,19 @@ EXPORT_SYMBOL_GPL(page_cache_sync_readah
* page_cache_async_ondemand() should be called when a page is used which
* has the PG_readahead flag: this is a marker to suggest that the application
* has used up enough of the readahead window that we should start pulling in
- * more pages. */
+ * more pages.
+ */
void
page_cache_async_readahead(struct address_space *mapping,
struct file_ra_state *ra, struct file *filp,
struct page *page, pgoff_t offset,
unsigned long req_size)
{
+ pgoff_t demote_idx = offset - min_t(pgoff_t, offset, ra->size);
+ struct page *pages[16];
+ unsigned nr_pages;
+ unsigned i;
+
/* no read-ahead */
if (!ra->ra_pages)
return;
@@ -473,6 +480,36 @@ page_cache_async_readahead(struct addres
if (bdi_read_congested(mapping->backing_dev_info))
return;
+ /*
+ * Read-ahead use once: when the ra window is maximal this is a good
+ * hint that there is sequential IO, which implies that the pages that
+ * have been used thus far can be reclaimed
+ */
+ if (ra->size == ra->ra_pages) do {
+ nr_pages = find_get_pages(mapping,
+ demote_idx, ARRAY_SIZE(pages), pages);
+
+ for (i = 0; i < nr_pages; i++) {
+ page = pages[i];
+ demote_idx = page_index(page);
+
+ /*
+ * The page is active. This means there are other
+ * users. We should not take away somebody else's
+ * pages, so do not drop behind beyond this point.
+ */
+ if (demote_idx < offset && !PageActive(page)) {
+ lru_demote(page);
+ } else {
+ demote_idx = offset;
+ break;
+ }
+ }
+ demote_idx++;
+
+ release_pages(pages, nr_pages, 0);
+ } while (demote_idx < offset);
+
/* do read-ahead */
ondemand_readahead(mapping, ra, filp, true, offset, req_size);
}
--
next prev parent reply other threads:[~2007-07-21 19:02 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-07-21 21:00 [PATCH 0/3] readahead drop behind and size adjustment Peter Zijlstra
2007-07-21 21:00 ` Peter Zijlstra [this message]
2007-07-21 20:29 ` [PATCH 1/3] readahead: drop behind Eric St-Laurent
2007-07-21 20:37 ` Peter Zijlstra
2007-07-21 20:59 ` Eric St-Laurent
2007-07-21 21:06 ` Peter Zijlstra
2007-07-25 3:55 ` Eric St-Laurent
2007-07-21 21:00 ` [PATCH 2/3] readahead: fadvise drop behind controls Peter Zijlstra
2007-07-21 21:00 ` [PATCH 3/3] readahead: scale max readahead size depending on memory size Peter Zijlstra
2007-07-22 8:24 ` Jens Axboe
2007-07-22 8:36 ` Peter Zijlstra
2007-07-22 8:50 ` Jens Axboe
2007-07-22 9:17 ` Peter Zijlstra
2007-07-22 16:44 ` Jens Axboe
2007-07-23 10:04 ` Jörn Engel
2007-07-23 10:11 ` Jens Axboe
2007-07-23 22:44 ` Rusty Russell
2007-07-22 23:52 ` Rik van Riel
2007-07-23 5:22 ` Jens Axboe
2007-07-22 8:45 ` Fengguang Wu
2007-07-22 8:45 ` Fengguang Wu
2007-07-22 8:59 ` Peter Zijlstra
2007-07-22 9:53 ` Fengguang Wu
2007-07-22 9:53 ` Fengguang Wu
2007-07-22 2:39 ` [PATCH 0/3] readahead drop behind and size adjustment Fengguang Wu
2007-07-22 2:39 ` Fengguang Wu
2007-07-22 2:44 ` Dave Jones
2007-07-22 8:10 ` Fengguang Wu
2007-07-22 8:10 ` Fengguang Wu
2007-07-22 8:24 ` Peter Zijlstra
2007-07-22 8:29 ` Fengguang Wu
2007-07-22 8:29 ` Fengguang Wu
2007-07-22 8:33 ` Rusty Russell
2007-07-22 8:45 ` Peter Zijlstra
2007-07-23 9:00 ` Nick Piggin
2007-07-23 14:24 ` Fengguang Wu
2007-07-23 14:24 ` Fengguang Wu
2007-07-23 19:40 ` Andrew Morton
2007-07-24 0:47 ` Fengguang Wu
2007-07-24 0:47 ` Fengguang Wu
2007-07-24 1:17 ` Andrew Morton
2007-07-24 8:50 ` Andreas Dilger
2007-07-24 4:30 ` Nick Piggin
2007-07-25 4:35 ` Eric St-Laurent
2007-07-25 5:19 ` Nick Piggin
2007-07-25 6:18 ` Eric St-Laurent
2007-07-25 7:09 ` Nick Piggin
2007-07-25 7:48 ` Eric St-Laurent
2007-07-25 15:36 ` Rik van Riel
2007-07-25 15:33 ` Rik van Riel
2007-07-29 7:44 ` Eric St-Laurent
2007-07-25 15:28 ` Rik van Riel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070721210051.975382000@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=csnook@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lnxninja@us.ibm.com \
--cc=riel@redhat.com \
--cc=rusty@rustcorp.com.au \
--cc=wfg@mail.ustc.edu.cn \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.