From: Paul Gortmaker <p_gortmaker@yahoo.com>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: linux-kernel@vger.kernel.org
Subject: Making diff(1) of linux kernels faster
Date: Sun, 14 Oct 2001 04:58:29 -0400 [thread overview]
Message-ID: <3BC953B5.18870B14@yahoo.com> (raw)
A while ago somebody with too much memory was gloating that they
would do a "find ... xargs cat>/dev/null" on several 2.4.x trees
so that diff wouldn't thrash the disk with a million seeks :-)
Well, I taught diff to read each tree sequentially 1st and the results
were quite surprising (linux-2.2 kernel, two identical 8 MB trees, on
some older hardware, average times reported, new diff option is "-z").
diff -urN, nothing cached: 36 seconds
diff -urzN, nothing cached: 7.5 seconds (about 1/5 !!!!!)
diff -urN, all cached: 1.04 seconds
diff -urzN, all cached: 1.66 seconds
So, with the cold cache, my patch cut the time by a factor of 5(!!)
and the amount of audible death growls from the disk is also reduced.
In the warm case, you pay a slight penalty since the simple hack
doesn't try to keep the file data around while priming the cache.
Now if I only had enough ram to personally test how much it helps
against a couple of 2.4.x kernel trees... other stats welcomed.
Paul.
diff -ruz orig/diffutils-2.7/diff.c diffutils-2.7/diff.c
--- orig/diffutils-2.7/diff.c Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.c Sun Oct 14 03:59:33 2001
@@ -206,6 +206,7 @@
{"exclude", 1, 0, 'x'},
{"exclude-from", 1, 0, 'X'},
{"side-by-side", 0, 0, 'y'},
+ {"zoom", 0, 0, 'z'},
{"unified", 2, 0, 'U'},
{"left-column", 0, 0, 129},
{"suppress-common-lines", 0, 0, 130},
@@ -244,7 +245,7 @@
/* Decode the options. */
while ((c = getopt_long (argc, argv,
- "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
+ "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yz",
longopts, 0)) != EOF)
{
switch (c)
@@ -493,6 +494,11 @@
specify_style (OUTPUT_SDIFF);
break;
+ case 'z':
+ /* Pre-read each tree sequentially to prime cache, avoid seeks. */
+ preread_tree = 1;
+ break;
+
case 'W':
/* Set the line width for OUTPUT_SDIFF. */
if (ck_atoi (optarg, &width) || width <= 0)
@@ -736,6 +742,7 @@
"-S FILE --starting-file=FILE Start with FILE when comparing directories.\n",
"--horizon-lines=NUM Keep NUM lines of the common prefix and suffix.",
"-d --minimal Try hard to find a smaller set of changes.",
+"-z --zoom Assume both trees (with -r) will fit into machine core.",
"-H --speed-large-files Assume large files and many scattered small changes.\n",
"-v --version Output version info.",
"--help Output this help.",
@@ -990,6 +997,15 @@
}
else
{
+
+ /* Sometimes faster to load each tree into OS's cache 1st */
+
+ if (depth == 0 && recursive && preread_tree)
+ {
+ preread(inf[0].name);
+ preread(inf[1].name);
+ }
+
val = diff_dirs (inf, compare_files, depth);
}
diff -ruz orig/diffutils-2.7/diff.h diffutils-2.7/diff.h
--- orig/diffutils-2.7/diff.h Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.h Fri Oct 12 11:50:43 2001
@@ -93,6 +93,9 @@
/* File labels for `-c' output headers (-L). */
EXTERN char *file_label[2];
+/* 1 if trees should be read sequentially to avoid seeks during recursive. */
+EXTERN int preread_tree;
+
struct regexp_list
{
struct re_pattern_buffer buf;
diff -ruz orig/diffutils-2.7/io.c diffutils-2.7/io.c
--- orig/diffutils-2.7/io.c Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/io.c Fri Oct 12 11:51:55 2001
@@ -182,6 +182,64 @@
current->buffer = xrealloc (current->buffer, current->bufsize);
}
}
+
+/* Preload the OS's cache with all files of one branch for recursive diffs */
+
+void
+preread (dir)
+ const char *dir;
+{
+
+ DIR *d;
+ struct dirent *dent;
+
+ d = opendir(dir);
+ if (d == NULL) return;
+
+ while ((dent = readdir(d)) != NULL)
+ {
+
+ char *name, *path;
+ struct file_data *f;
+
+ name = dent->d_name;
+ if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+ continue;
+
+ f = xmalloc(sizeof(struct file_data));
+ memset(f, 0, sizeof(struct file_data));
+
+ path = xmalloc(strlen(dir)+strlen(name)+2);
+ strcpy(path, dir);
+ strcat(path, "/");
+ strcat(path, name);
+
+ if (stat(path, &f->stat) != 0)
+ {
+ free(f);
+ free(path);
+ continue;
+ }
+
+ if (S_ISDIR(f->stat.st_mode))
+ preread(path);
+ else if (S_ISREG(f->stat.st_mode))
+ {
+ f->desc = open(path, O_RDONLY);
+ if (f->desc != -1)
+ {
+ slurp(f);
+ if (f->bufsize != 0)
+ free(f->buffer);
+ close(f->desc);
+ }
+ }
+ free(path);
+ free(f);
+ }
+ closedir(d);
+}
+
\f
/* Split the file into lines, simultaneously computing the equivalence class for
each line. */
next reply other threads:[~2001-10-14 9:14 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2001-10-14 8:58 Paul Gortmaker [this message]
2001-10-14 9:51 ` Making diff(1) of linux kernels faster john slee
2001-10-14 15:48 ` Linus Torvalds
2001-10-17 12:25 ` Paul Gortmaker
2001-10-17 16:59 ` Linus Torvalds
2001-10-17 16:44 ` Marcelo Tosatti
2001-10-17 18:21 ` Linus Torvalds
2001-10-17 20:21 ` Andrea Arcangeli
2001-10-17 19:06 ` Marcelo Tosatti
2001-10-17 21:23 ` chris
2001-10-17 21:30 ` Andrea Arcangeli
2001-10-17 21:45 ` Linus Torvalds
2001-10-17 17:12 ` John Levon
2001-10-17 19:19 ` Benjamin LaHaise
2001-10-17 18:50 ` Andreas Schwab
-- strict thread matches above, loose matches on Subject: below --
2001-10-17 17:57 willy tarreau
2001-10-18 0:25 ` Horst von Brand
2001-10-18 8:02 ` Nick Craig-Wood
2001-10-18 9:55 ` Wojtek Pilorz
2001-10-18 11:18 ` vda
2001-10-18 12:39 Marco C. Mason
2001-10-18 14:48 Sean Neakums
2001-10-22 16:39 Andries.Brouwer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3BC953B5.18870B14@yahoo.com \
--to=p_gortmaker@yahoo.com \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@transmeta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox