public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Paul Gortmaker <p_gortmaker@yahoo.com>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: linux-kernel@vger.kernel.org
Subject: Making diff(1) of linux kernels faster
Date: Sun, 14 Oct 2001 04:58:29 -0400	[thread overview]
Message-ID: <3BC953B5.18870B14@yahoo.com> (raw)


A while ago somebody with too much memory was gloating that they 
would do a "find ... xargs cat>/dev/null" on several 2.4.x trees
so that diff wouldn't thrash the disk with a million seeks  :-)

Well, I taught diff to read each tree sequentially 1st and the results
were quite surprising (linux-2.2 kernel, two identical 8 MB trees, on 
some older hardware, average times reported, new diff option is "-z").

   diff -urN, nothing cached:  36 seconds
   diff -urzN, nothing cached:  7.5 seconds  (about 1/5 !!!!!)

   diff -urN, all cached:  1.04 seconds
   diff -urzN, all cached: 1.66 seconds

So, with the cold cache, my patch cut the time by a factor of 5(!!)
and the amount of audible death growls from the disk is also reduced.  
In the warm case, you pay a slight penalty since the simple hack
doesn't try to keep the file data around while priming the cache.

Now if I only had enough ram to personally test how much it helps
against a couple of 2.4.x kernel trees...  other stats welcomed.

Paul.

diff -ruz orig/diffutils-2.7/diff.c diffutils-2.7/diff.c
--- orig/diffutils-2.7/diff.c	Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.c	Sun Oct 14 03:59:33 2001
@@ -206,6 +206,7 @@
   {"exclude", 1, 0, 'x'},
   {"exclude-from", 1, 0, 'X'},
   {"side-by-side", 0, 0, 'y'},
+  {"zoom", 0, 0, 'z'},
   {"unified", 2, 0, 'U'},
   {"left-column", 0, 0, 129},
   {"suppress-common-lines", 0, 0, 130},
@@ -244,7 +245,7 @@
   /* Decode the options.  */
 
   while ((c = getopt_long (argc, argv,
-			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
+			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yz",
 			   longopts, 0)) != EOF)
     {
       switch (c)
@@ -493,6 +494,11 @@
 	  specify_style (OUTPUT_SDIFF);
 	  break;
 
+	case 'z':
+	  /* Pre-read each tree sequentially to prime cache, avoid seeks. */
+	  preread_tree = 1;
+	  break;
+
 	case 'W':
 	  /* Set the line width for OUTPUT_SDIFF.  */
 	  if (ck_atoi (optarg, &width) || width <= 0)
@@ -736,6 +742,7 @@
 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
 "-d  --minimal  Try hard to find a smaller set of changes.",
+"-z  --zoom  Assume both trees (with -r) will fit into machine core.",
 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
 "-v  --version  Output version info.",
 "--help  Output this help.",
@@ -990,6 +997,15 @@
 	}
       else
 	{
+
+          /* Sometimes faster to load each tree into OS's cache 1st */
+
+          if (depth == 0 && recursive && preread_tree)
+	    {
+              preread(inf[0].name);
+              preread(inf[1].name);
+            }
+		
 	  val = diff_dirs (inf, compare_files, depth);
 	}
 
diff -ruz orig/diffutils-2.7/diff.h diffutils-2.7/diff.h
--- orig/diffutils-2.7/diff.h	Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.h	Fri Oct 12 11:50:43 2001
@@ -93,6 +93,9 @@
 /* File labels for `-c' output headers (-L).  */
 EXTERN char *file_label[2];
 
+/* 1 if trees should be read sequentially to avoid seeks during recursive. */
+EXTERN int	preread_tree;
+
 struct regexp_list
 {
   struct re_pattern_buffer buf;
diff -ruz orig/diffutils-2.7/io.c diffutils-2.7/io.c
--- orig/diffutils-2.7/io.c	Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/io.c	Fri Oct 12 11:51:55 2001
@@ -182,6 +182,64 @@
       current->buffer = xrealloc (current->buffer, current->bufsize);
     }
 }
+
+/* Preload the OS's cache with all files of one branch for recursive diffs */
+
+void
+preread (dir)
+	const char *dir;
+{
+
+  DIR *d;
+  struct dirent *dent;
+
+  d = opendir(dir);
+  if (d == NULL) return;
+
+  while ((dent = readdir(d)) != NULL)
+    {
+
+      char *name, *path;
+      struct file_data *f;
+
+      name = dent->d_name;
+      if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+            continue;
+
+      f = xmalloc(sizeof(struct file_data));
+      memset(f, 0, sizeof(struct file_data));
+
+      path = xmalloc(strlen(dir)+strlen(name)+2);
+      strcpy(path, dir);
+      strcat(path, "/");
+      strcat(path, name);
+
+      if (stat(path, &f->stat) != 0)
+        {
+           free(f);
+           free(path);
+           continue;
+        }
+	
+      if (S_ISDIR(f->stat.st_mode))
+           preread(path);
+      else if (S_ISREG(f->stat.st_mode))
+        {
+          f->desc = open(path, O_RDONLY);
+          if (f->desc != -1)
+            {
+              slurp(f); 
+              if (f->bufsize != 0)
+                free(f->buffer);
+              close(f->desc);
+            }
+        } 
+      free(path);
+      free(f); 
+  }
+  closedir(d);
+}
+
 \f
 /* Split the file into lines, simultaneously computing the equivalence class for
    each line. */



             reply	other threads:[~2001-10-14  9:14 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-10-14  8:58 Paul Gortmaker [this message]
2001-10-14  9:51 ` Making diff(1) of linux kernels faster john slee
2001-10-14 15:48 ` Linus Torvalds
2001-10-17 12:25   ` Paul Gortmaker
2001-10-17 16:59     ` Linus Torvalds
2001-10-17 16:44       ` Marcelo Tosatti
2001-10-17 18:21         ` Linus Torvalds
2001-10-17 20:21           ` Andrea Arcangeli
2001-10-17 19:06             ` Marcelo Tosatti
2001-10-17 21:23             ` chris
2001-10-17 21:30               ` Andrea Arcangeli
2001-10-17 21:45               ` Linus Torvalds
2001-10-17 17:12       ` John Levon
2001-10-17 19:19       ` Benjamin LaHaise
2001-10-17 18:50     ` Andreas Schwab
  -- strict thread matches above, loose matches on Subject: below --
2001-10-17 17:57 willy tarreau
2001-10-18  0:25 ` Horst von Brand
2001-10-18  8:02   ` Nick Craig-Wood
2001-10-18  9:55     ` Wojtek Pilorz
2001-10-18 11:18       ` vda
2001-10-18 12:39 Marco C. Mason
2001-10-18 14:48 Sean Neakums
2001-10-22 16:39 Andries.Brouwer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3BC953B5.18870B14@yahoo.com \
    --to=p_gortmaker@yahoo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox