All of lore.kernel.org
 help / color / mirror / Atom feed
From: Paul Gortmaker <p_gortmaker@yahoo.com>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: linux-kernel@vger.kernel.org
Subject: Making diff(1) of linux kernels faster
Date: Sun, 14 Oct 2001 04:58:29 -0400	[thread overview]
Message-ID: <3BC953B5.18870B14@yahoo.com> (raw)


A while ago somebody with too much memory was gloating that they 
would do a "find ... xargs cat>/dev/null" on several 2.4.x trees
so that diff wouldn't thrash the disk with a million seeks  :-)

Well, I taught diff to read each tree sequentially 1st and the results
were quite surprising (linux-2.2 kernel, two identical 8 MB trees, on 
some older hardware, average times reported, new diff option is "-z").

   diff -urN, nothing cached:  36 seconds
   diff -urzN, nothing cached:  7.5 seconds  (about 1/5 !!!!!)

   diff -urN, all cached:  1.04 seconds
   diff -urzN, all cached: 1.66 seconds

So, with the cold cache, my patch cut the time by a factor of 5(!!)
and the amount of audible death growls from the disk is also reduced.  
In the warm case, you pay a slight penalty since the simple hack
doesn't try to keep the file data around while priming the cache.

Now if I only had enough ram to personally test how much it helps
against a couple of 2.4.x kernel trees...  other stats welcomed.

Paul.

diff -ruz orig/diffutils-2.7/diff.c diffutils-2.7/diff.c
--- orig/diffutils-2.7/diff.c	Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.c	Sun Oct 14 03:59:33 2001
@@ -206,6 +206,7 @@
   {"exclude", 1, 0, 'x'},
   {"exclude-from", 1, 0, 'X'},
   {"side-by-side", 0, 0, 'y'},
+  {"zoom", 0, 0, 'z'},
   {"unified", 2, 0, 'U'},
   {"left-column", 0, 0, 129},
   {"suppress-common-lines", 0, 0, 130},
@@ -244,7 +245,7 @@
   /* Decode the options.  */
 
   while ((c = getopt_long (argc, argv,
-			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:y",
+			   "0123456789abBcC:dD:efF:hHiI:lL:nNpPqrsS:tTuU:vwW:x:X:yz",
 			   longopts, 0)) != EOF)
     {
       switch (c)
@@ -493,6 +494,11 @@
 	  specify_style (OUTPUT_SDIFF);
 	  break;
 
+	case 'z':
+	  /* Pre-read each tree sequentially to prime cache, avoid seeks. */
+	  preread_tree = 1;
+	  break;
+
 	case 'W':
 	  /* Set the line width for OUTPUT_SDIFF.  */
 	  if (ck_atoi (optarg, &width) || width <= 0)
@@ -736,6 +742,7 @@
 "-S FILE  --starting-file=FILE  Start with FILE when comparing directories.\n",
 "--horizon-lines=NUM  Keep NUM lines of the common prefix and suffix.",
 "-d  --minimal  Try hard to find a smaller set of changes.",
+"-z  --zoom  Assume both trees (with -r) will fit into machine core.",
 "-H  --speed-large-files  Assume large files and many scattered small changes.\n",
 "-v  --version  Output version info.",
 "--help  Output this help.",
@@ -990,6 +997,15 @@
 	}
       else
 	{
+
+          /* Sometimes faster to load each tree into OS's cache 1st */
+
+          if (depth == 0 && recursive && preread_tree)
+	    {
+              preread(inf[0].name);
+              preread(inf[1].name);
+            }
+		
 	  val = diff_dirs (inf, compare_files, depth);
 	}
 
diff -ruz orig/diffutils-2.7/diff.h diffutils-2.7/diff.h
--- orig/diffutils-2.7/diff.h	Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/diff.h	Fri Oct 12 11:50:43 2001
@@ -93,6 +93,9 @@
 /* File labels for `-c' output headers (-L).  */
 EXTERN char *file_label[2];
 
+/* 1 if trees should be read sequentially to avoid seeks during recursive. */
+EXTERN int	preread_tree;
+
 struct regexp_list
 {
   struct re_pattern_buffer buf;
diff -ruz orig/diffutils-2.7/io.c diffutils-2.7/io.c
--- orig/diffutils-2.7/io.c	Thu Sep 22 12:47:00 1994
+++ diffutils-2.7/io.c	Fri Oct 12 11:51:55 2001
@@ -182,6 +182,64 @@
       current->buffer = xrealloc (current->buffer, current->bufsize);
     }
 }
+
+/* Preload the OS's cache with all files of one branch for recursive diffs */
+
+void
+preread (dir)
+	const char *dir;
+{
+
+  DIR *d;
+  struct dirent *dent;
+
+  d = opendir(dir);
+  if (d == NULL) return;
+
+  while ((dent = readdir(d)) != NULL)
+    {
+
+      char *name, *path;
+      struct file_data *f;
+
+      name = dent->d_name;
+      if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
+            continue;
+
+      f = xmalloc(sizeof(struct file_data));
+      memset(f, 0, sizeof(struct file_data));
+
+      path = xmalloc(strlen(dir)+strlen(name)+2);
+      strcpy(path, dir);
+      strcat(path, "/");
+      strcat(path, name);
+
+      if (stat(path, &f->stat) != 0)
+        {
+           free(f);
+           free(path);
+           continue;
+        }
+	
+      if (S_ISDIR(f->stat.st_mode))
+           preread(path);
+      else if (S_ISREG(f->stat.st_mode))
+        {
+          f->desc = open(path, O_RDONLY);
+          if (f->desc != -1)
+            {
+              slurp(f); 
+              if (f->bufsize != 0)
+                free(f->buffer);
+              close(f->desc);
+            }
+        } 
+      free(path);
+      free(f); 
+  }
+  closedir(d);
+}
+
 \f
 /* Split the file into lines, simultaneously computing the equivalence class for
    each line. */



             reply	other threads:[~2001-10-14  9:14 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2001-10-14  8:58 Paul Gortmaker [this message]
2001-10-14  9:51 ` Making diff(1) of linux kernels faster john slee
2001-10-14 15:48 ` Linus Torvalds
2001-10-17 12:25   ` Paul Gortmaker
2001-10-17 16:59     ` Linus Torvalds
2001-10-17 16:44       ` Marcelo Tosatti
2001-10-17 18:21         ` Linus Torvalds
2001-10-17 20:21           ` Andrea Arcangeli
2001-10-17 19:06             ` Marcelo Tosatti
2001-10-17 21:23             ` chris
2001-10-17 21:30               ` Andrea Arcangeli
2001-10-17 21:45               ` Linus Torvalds
2001-10-17 17:12       ` John Levon
2001-10-17 19:19       ` Benjamin LaHaise
2001-10-17 18:50     ` Andreas Schwab
  -- strict thread matches above, loose matches on Subject: below --
2001-10-17 17:57 willy tarreau
2001-10-18  0:25 ` Horst von Brand
2001-10-18  8:02   ` Nick Craig-Wood
2001-10-18  9:55     ` Wojtek Pilorz
2001-10-18 11:18       ` vda
2001-10-18 12:39 Marco C. Mason
2001-10-18 14:48 Sean Neakums
2001-10-22 16:39 Andries.Brouwer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3BC953B5.18870B14@yahoo.com \
    --to=p_gortmaker@yahoo.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.