From: Paul Gortmaker <p_gortmaker@yahoo.com>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: linux-kernel@vger.kernel.org
Subject: Making diff(1) of linux kernels faster
Date: Sun, 14 Oct 2001 04:58:29 -0400 [thread overview]
Message-ID: <3BC953B5.18870B14@yahoo.com> (raw)
A while ago somebody with too much memory was gloating that they
would do a "find ... xargs cat>/dev/null" on several 2.4.x trees
so that diff wouldn't thrash the disk with a million seeks :-)
Well, I taught diff to read each tree sequentially 1st and the results
were quite surprising (linux-2.2 kernel, two identical 8 MB trees, on
some older hardware, average times reported, new diff option is "-z").
diff -urN, nothing cached: 36 seconds
diff -urzN, nothing cached: 7.5 seconds (about 1/5 !!!!!)
diff -urN, all cached: 1.04 seconds
diff -urzN, all cached: 1.66 seconds
So, with the cold cache, my patch cut the time by a factor of 5(!!)
and the amount of audible death growls from the disk is also reduced.
In the warm case, you pay a slight penalty since the simple hack
doesn't try to keep the file data around while priming the cache.
Now if I only had enough ram to personally test how much it helps
against a couple of 2.4.x kernel trees... other stats welcomed.
Paul.
diff -ruz orig/diffutils-2.7/diff.c diffutils-2.7/diff.c
--- orig/diffutils-2.7/diff.c Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.c Sun Oct 14 03:59:33 2001
@@ -206,6 +206,7 @@
{"exclude", 1, 0, 'x'},
{"exclude-from", 1, 0, 'X'},
{"side-by-side", 0, 0, 'y'},
+ {"zoom", 0, 0, 'z'},
{"unified", 2, 0, 'U'},
{"left-column", 0, 0, 129},
{"suppress-common-lines", 0, 0, 130},
@@ -244,7 +245,7 @@
/* Decode the options. */
while ((c = getopt_long (argc, argv,
- "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
+ "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yz",
longopts, 0)) != EOF)
{
switch (c)
@@ -493,6 +494,11 @@
specify_style (OUTPUT_SDIFF);
break;
+ case 'z':
+ /* Pre-read each tree sequentially to prime cache, avoid seeks. */
+ preread_tree = 1;
+ break;
+
case 'W':
/* Set the line width for OUTPUT_SDIFF. */
if (ck_atoi (optarg, &width) || width <= 0)
@@ -736,6 +742,7 @@
"-S FILE --starting-file=FILE Start with FILE when comparing directories.\n",
"--horizon-lines=NUM Keep NUM lines of the common prefix and suffix.",
"-d --minimal Try hard to find a smaller set of changes.",
+"-z --zoom Assume both trees (with -r) will fit into machine core.",
"-H --speed-large-files Assume large files and many scattered small changes.\n",
"-v --version Output version info.",
"--help Output this help.",
@@ -990,6 +997,15 @@
}
else
{
+
+ /* Sometimes faster to load each tree into OS's cache 1st */
+
+ if (depth == 0 && recursive && preread_tree)
+ {
+ preread(inf[0].name);
+ preread(inf[1].name);
+ }
+
val = diff_dirs (inf, compare_files, depth);
}
diff -ruz orig/diffutils-2.7/diff.h diffutils-2.7/diff.h
--- orig/diffutils-2.7/diff.h Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.h Fri Oct 12 11:50:43 2001
@@ -93,6 +93,9 @@
/* File labels for `-c' output headers (-L). */
EXTERN char *file_label[2];
+/* 1 if trees should be read sequentially to avoid seeks during recursive. */
+EXTERN int preread_tree;
+
struct regexp_list
{
struct re_pattern_buffer buf;
diff -ruz orig/diffutils-2.7/io.c diffutils-2.7/io.c
--- orig/diffutils-2.7/io.c Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/io.c Fri Oct 12 11:51:55 2001
@@ -182,6 +182,64 @@
current->buffer = xrealloc (current->buffer, current->bufsize);
}
}
+
+/* Preload the OS's cache with all files of one branch for recursive diffs */
+
+void
+preread (dir)
+ const char *dir;
+{
+
+ DIR *d;
+ struct dirent *dent;
+
+ d = opendir(dir);
+ if (d == NULL) return;
+
+ while ((dent = readdir(d)) != NULL)
+ {
+
+ char *name, *path;
+ struct file_data *f;
+
+ name = dent->d_name;
+ if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+ continue;
+
+ f = xmalloc(sizeof(struct file_data));
+ memset(f, 0, sizeof(struct file_data));
+
+ path = xmalloc(strlen(dir)+strlen(name)+2);
+ strcpy(path, dir);
+ strcat(path, "/");
+ strcat(path, name);
+
+ if (stat(path, &f->stat) != 0)
+ {
+ free(f);
+ free(path);
+ continue;
+ }
+
+ if (S_ISDIR(f->stat.st_mode))
+ preread(path);
+ else if (S_ISREG(f->stat.st_mode))
+ {
+ f->desc = open(path, O_RDONLY);
+ if (f->desc != -1)
+ {
+ slurp(f);
+ if (f->bufsize != 0)
+ free(f->buffer);
+ close(f->desc);
+ }
+ }
+ free(path);
+ free(f);
+ }
+ closedir(d);
+}
+
\f
/* Split the file into lines, simultaneously computing the equivalence class for
each line. */
next reply other threads:[~2001-10-14 9:14 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2001-10-14 8:58 Paul Gortmaker [this message]
2001-10-14 9:51 ` Making diff(1) of linux kernels faster john slee
2001-10-14 15:48 ` Linus Torvalds
2001-10-17 12:25 ` Paul Gortmaker
2001-10-17 16:59 ` Linus Torvalds
2001-10-17 16:44 ` Marcelo Tosatti
2001-10-17 18:21 ` Linus Torvalds
2001-10-17 20:21 ` Andrea Arcangeli
2001-10-17 19:06 ` Marcelo Tosatti
2001-10-17 21:23 ` chris
2001-10-17 21:30 ` Andrea Arcangeli
2001-10-17 21:45 ` Linus Torvalds
2001-10-17 17:12 ` John Levon
2001-10-17 19:19 ` Benjamin LaHaise
2001-10-17 18:50 ` Andreas Schwab
-- strict thread matches above, loose matches on Subject: below --
2001-10-17 17:57 willy tarreau
2001-10-18 0:25 ` Horst von Brand
2001-10-18 8:02 ` Nick Craig-Wood
2001-10-18 9:55 ` Wojtek Pilorz
2001-10-18 11:18 ` vda
2001-10-18 12:39 Marco C. Mason
2001-10-18 14:48 Sean Neakums
2001-10-22 16:39 Andries.Brouwer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3BC953B5.18870B14@yahoo.com \
--to=p_gortmaker@yahoo.com \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@transmeta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.