From: Robert Love <rml@tech9.net>
To: linux-kernel@vger.kernel.org
Cc: akpm@digeo.com, riel@conectiva.com.br
Subject: [PATCH] O_STREAMING - flag for optimal streaming I/O
Date: 07 Oct 2002 22:38:55 -0400 [thread overview]
Message-ID: <1034044736.29463.318.camel@phantasy> (raw)
Attached patch implements an O_STREAMING file I/O flag which enables
manual drop-behind of pages.
If the file has O_STREAMING set then the user has explicitly said "this
is streaming data, I know I will not revisit this, do not cache
anything". So we drop pages from the pagecache before our current
index. We have to fiddle a bit to get writes working since we do
write-behind but the logic is there and it works.
Some numbers. A simple streaming read to verify the pagecache effects:
Streaming 1GB Read (avg of many runs, mem=2GB):
O_STREAMING Wall time Change in Page Cache
Yes 25.58s 0
No 25.55s +835MB
Another read with much more VM pressure:
Streaming 1GB Read (avg of many runs, mem=8M)
O_STREAMING Wall time Change in Page Cache
Yes 25.76s 0
No 29.01s +1MB
And now the kicker:
Kernel compile (make -j2) and concurrent streaming I/O
(avg of two runs, mem=128M):
O_STREAMING Time to complete Kernel Compile
Yes 3m27.863s
No 4m15.818s
This is c/o Andrew Morton.
Patch is against 2.4.20-pre9. Why not 2.5? Because Andrew says we can
do better, perhaps with a real drop-behind heuristic. As 20 Oct looms
quite close, we shall see.
Robert Love
Implement O_STREAMING for streaming I/O for manual drop-behind of pages.
include/asm-arm/fcntl.h | 1
include/asm-i386/fcntl.h | 1
include/asm-mips/fcntl.h | 1
include/asm-ppc/fcntl.h | 1
include/asm-sh/fcntl.h | 1
mm/filemap.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 94 insertions(+)
diff -urN linux-2.4.20-pre9/include/asm-arm/fcntl.h linux/include/asm-arm/fcntl.h
--- linux-2.4.20-pre9/include/asm-arm/fcntl.h 2002-10-06 14:57:26.000000000 -0400
+++ linux/include/asm-arm/fcntl.h 2002-10-07 18:45:51.000000000 -0400
@@ -20,6 +20,7 @@
#define O_NOFOLLOW 0100000 /* don't follow links */
#define O_DIRECT 0200000 /* direct disk access hint - currently ignored */
#define O_LARGEFILE 0400000
+#define O_STREAMING 04000000 /* streaming access */
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
diff -urN linux-2.4.20-pre9/include/asm-i386/fcntl.h linux/include/asm-i386/fcntl.h
--- linux-2.4.20-pre9/include/asm-i386/fcntl.h 2002-10-06 14:57:21.000000000 -0400
+++ linux/include/asm-i386/fcntl.h 2002-10-07 18:45:51.000000000 -0400
@@ -20,6 +20,7 @@
#define O_LARGEFILE 0100000
#define O_DIRECTORY 0200000 /* must be a directory */
#define O_NOFOLLOW 0400000 /* don't follow links */
+#define O_STREAMING 04000000 /* streaming access */
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
diff -urN linux-2.4.20-pre9/include/asm-mips/fcntl.h linux/include/asm-mips/fcntl.h
--- linux-2.4.20-pre9/include/asm-mips/fcntl.h 2002-10-06 14:57:21.000000000 -0400
+++ linux/include/asm-mips/fcntl.h 2002-10-07 18:45:51.000000000 -0400
@@ -26,6 +26,7 @@
#define O_DIRECT 0x8000 /* direct disk access hint */
#define O_DIRECTORY 0x10000 /* must be a directory */
#define O_NOFOLLOW 0x20000 /* don't follow links */
+#define O_STREAMING 0x400000 /* streaming access */
#define O_NDELAY O_NONBLOCK
diff -urN linux-2.4.20-pre9/include/asm-ppc/fcntl.h linux/include/asm-ppc/fcntl.h
--- linux-2.4.20-pre9/include/asm-ppc/fcntl.h 2002-10-06 14:57:22.000000000 -0400
+++ linux/include/asm-ppc/fcntl.h 2002-10-07 18:45:51.000000000 -0400
@@ -23,6 +23,7 @@
#define O_NOFOLLOW 0100000 /* don't follow links */
#define O_LARGEFILE 0200000
#define O_DIRECT 0400000 /* direct disk access hint */
+#define O_STREAMING 04000000 /* streaming access */
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
diff -urN linux-2.4.20-pre9/include/asm-sh/fcntl.h linux/include/asm-sh/fcntl.h
--- linux-2.4.20-pre9/include/asm-sh/fcntl.h 2002-10-06 14:57:27.000000000 -0400
+++ linux/include/asm-sh/fcntl.h 2002-10-07 18:45:51.000000000 -0400
@@ -20,6 +20,7 @@
#define O_LARGEFILE 0100000
#define O_DIRECTORY 0200000 /* must be a directory */
#define O_NOFOLLOW 0400000 /* don't follow links */
+#define O_STREAMING 04000000 /* streaming access */
#define F_DUPFD 0 /* dup */
#define F_GETFD 1 /* get close_on_exec */
diff -urN linux-2.4.20-pre9/mm/filemap.c linux/mm/filemap.c
--- linux-2.4.20-pre9/mm/filemap.c 2002-10-06 14:57:20.000000000 -0400
+++ linux/mm/filemap.c 2002-10-07 18:45:51.000000000 -0400
@@ -1322,6 +1322,90 @@
SetPageReferenced(page);
}
+/**
+ * shrink_list - non-blockingly drop pages from the given cache list
+ * @mapping: the mapping from which we want to drop pages
+ * @list: which list (e.g. locked, dirty, clean)?
+ * @max_index: greatest index from which we will drop pages
+ */
+static unsigned long shrink_list(struct address_space *mapping,
+ struct list_head *list,
+ unsigned long max_index)
+{
+ struct list_head *curr = list->prev;
+ unsigned long nr_shrunk = 0;
+
+ spin_lock(&pagemap_lru_lock);
+ spin_lock(&pagecache_lock);
+
+ while ((curr != list)) {
+ struct page *page = list_entry(curr, struct page, list);
+
+ curr = curr->prev;
+
+ if (page->index > max_index)
+ continue;
+
+ if (PageDirty(page))
+ continue;
+
+ if (TryLockPage(page))
+ break;
+
+ if (page->buffers && !try_to_release_page(page, 0)) {
+ /* probably dirty buffers */
+ unlock_page(page);
+ break;
+ }
+
+ if (page_count(page) != 1) {
+ unlock_page(page);
+ continue;
+ }
+
+ __lru_cache_del(page);
+ __remove_inode_page(page);
+ unlock_page(page);
+ page_cache_release(page);
+ nr_shrunk++;
+ }
+
+ spin_unlock(&pagecache_lock);
+ spin_unlock(&pagemap_lru_lock);
+
+ return nr_shrunk;
+}
+
+/**
+ * shrink_pagecache - nonblockingly drop pages from the mapping.
+ * @file: the file we are doing I/O on
+ * @max_index: the maximum index from which we are willing to drop pages
+ *
+ * This is for O_STREAMING, which says "I am streaming data, I know I will not
+ * revisit this; do not cache anything".
+ *
+ * max_index allows us to only drop pages which are behind `index', to avoid
+ * trashing readahead.
+ */
+static unsigned long shrink_pagecache(struct file *file,
+ unsigned long max_index)
+{
+ struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
+ unsigned long nr_locked, nr_clean, nr_dirty;
+
+ /*
+ * ensure we have a decent amount of work todo
+ */
+ if (mapping->nrpages < 256)
+ return 0;
+
+ nr_locked = shrink_list(mapping, &mapping->locked_pages, max_index);
+ nr_clean = shrink_list(mapping, &mapping->clean_pages, max_index);
+ nr_dirty = shrink_list(mapping, &mapping->dirty_pages, max_index);
+
+ return nr_locked + nr_clean + nr_dirty;
+}
+
/*
* This is a generic file read routine, and uses the
* inode->i_op->readpage() function for the actual low-level
@@ -1538,6 +1622,8 @@
filp->f_reada = 1;
if (cached_page)
page_cache_release(cached_page);
+ if (filp->f_flags & O_STREAMING)
+ shrink_pagecache(filp, index);
UPDATE_ATIME(inode);
}
@@ -3047,6 +3133,9 @@
if (file->f_flags & O_DIRECT)
goto o_direct;
+ if (file->f_flags & O_STREAMING)
+ shrink_pagecache(file, pos >> PAGE_CACHE_SHIFT);
+
do {
unsigned long index, offset;
long page_fault;
next reply other threads:[~2002-10-08 2:33 UTC|newest]
Thread overview: 68+ messages / expand[flat|nested] mbox.gz Atom feed top
2002-10-08 2:38 Robert Love [this message]
2002-10-08 10:42 ` [PATCH] O_STREAMING - flag for optimal streaming I/O J.A. Magallon
2002-10-08 18:08 ` Robert Love
2002-10-08 18:38 ` Chris Wedgwood
2002-10-08 18:49 ` Robert Love
2002-10-08 19:05 ` Chris Wedgwood
2002-10-08 19:17 ` Robert Love
2002-10-08 19:30 ` Andrew Morton
2002-10-09 14:14 ` Marco Colombo
2002-10-09 16:30 ` kernel
2002-10-08 19:52 ` Chris Wedgwood
2002-10-08 19:59 ` Robert Love
2002-10-08 20:01 ` Chris Wedgwood
2002-10-09 8:33 ` Giuliano Pochini
2002-10-09 8:43 ` Andrew Morton
2002-10-09 10:55 ` Giuliano Pochini
2002-10-09 17:05 ` Mark Mielke
2002-10-09 19:36 ` Giuliano Pochini
2002-10-09 22:24 ` Mark Mielke
2002-10-09 23:20 ` Jamie Lokier
2002-10-10 3:07 ` Mark Mielke
2002-10-10 10:55 ` Helge Hafting
2002-10-10 17:50 ` Mark Mielke
2002-10-10 3:29 ` Erik Andersen
2002-10-10 3:37 ` Robert Love
2002-10-10 13:39 ` Giuliano Pochini
2002-10-10 22:50 ` Mike Fedyk
2002-10-10 22:58 ` Erik Andersen
2002-10-11 8:26 ` Giuliano Pochini
2002-10-11 8:32 ` Helge Hafting
2002-10-10 8:33 ` Giuliano Pochini
2002-10-10 9:10 ` Erik Andersen
2002-10-10 9:38 ` Giuliano Pochini
2002-10-10 10:40 ` Miquel van Smoorenburg
2002-10-10 11:01 ` Helge Hafting
2002-10-10 12:29 ` Xavier Bestel
2002-10-10 13:17 ` Giuliano Pochini
2002-10-10 22:44 ` Mike Fedyk
2002-10-11 8:13 ` Giuliano Pochini
2002-10-10 11:38 ` O_STREAMING has insufficient info - how about fadvise() ? Alan Cox
2002-10-10 11:47 ` William Lee Irwin III
2002-10-10 15:34 ` Andrew Morton
2002-10-10 16:08 ` Alan Cox
2002-10-10 16:49 ` Oliver Xymoron
2002-10-10 15:37 ` [PATCH] O_STREAMING - flag for optimal streaming I/O Gerhard Mack
2002-10-10 22:47 ` Mike Fedyk
2002-10-11 2:14 ` Gerhard Mack
2002-10-11 8:10 ` Chris Wedgwood
2002-10-10 9:14 ` David Lang
2002-10-10 14:51 ` Denis Vlasenko
2002-10-08 19:53 ` Matthias Schniedermeyer
2002-10-08 19:59 ` Chris Wedgwood
2002-10-08 20:03 ` Andrew Morton
2002-10-08 20:34 ` Matthias Schniedermeyer
2002-10-08 20:42 ` Andrew Morton
2002-10-08 20:37 ` Larry McVoy
2002-10-09 11:53 ` Roy Sigurd Karlsbakk
2002-10-09 14:10 ` Marco Colombo
2002-10-09 14:14 ` Robert Love
2002-10-09 14:33 ` Richard B. Johnson
2002-10-09 15:27 ` Andreas Dilger
2002-10-09 23:17 ` Jamie Lokier
2002-10-09 23:46 ` Rik van Riel
2002-10-10 0:16 ` Jamie Lokier
2002-10-10 2:39 ` Erik Andersen
2002-10-10 10:33 ` Marco Colombo
2002-10-10 20:00 ` Erik Andersen
-- strict thread matches above, loose matches on Subject: below --
2002-10-11 4:16 Hank Leininger
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1034044736.29463.318.camel@phantasy \
--to=rml@tech9.net \
--cc=akpm@digeo.com \
--cc=linux-kernel@vger.kernel.org \
--cc=riel@conectiva.com.br \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.