From: Junio C Hamano <junkio@cox.net>
To: Linus Torvalds <torvalds@osdl.org>
Cc: git@vger.kernel.org
Subject: [PATCH] Optimize diff-cache -p --cached
Date: Tue, 03 May 2005 15:10:25 -0700 [thread overview]
Message-ID: <7vk6mgt0z2.fsf@assigned-by-dhcp.cox.net> (raw)
This patch optimizes "diff-cache -p --cached" by avoiding to
inflate blobs to create temporary files when either the blob
recorded in the cache or in the compared tree matches the
corresponding file in the work tree.
Here is an informal benchmark on my Duron 750. The tests were
done by running unpatched and patched alternately number of
times, and these are the last three pairs:
real 0m0.738s user 0m0.630s sys 0m0.100s unpatched
real 0m0.695s user 0m0.590s sys 0m0.100s patched
real 0m0.733s user 0m0.560s sys 0m0.170s unpatched
real 0m0.705s user 0m0.590s sys 0m0.110s patched
real 0m0.732s user 0m0.550s sys 0m0.180s unpatched
real 0m0.692s user 0m0.590s sys 0m0.100s patched
The benchmark was run in a fully checked out linux-2.6 GIT
repository. The work tree matched one commit, and comparison
was done against another commit which was 20-or-so commits
before the work tree.
$ new=a6ad57fb4b5e9d68553f4440377b99f75588fa88
$ old=cd63499cbe37e53e6cc084c8a35d911a4613c797
$ git-read-tree $new
$ git-checkout-cache -f -a
$ git-update-cache --refresh
$ git-rev-tree $new ^$old | wc -l
19
$ export GIT_EXTERNAL_DIFF=true
$ time git-diff-cache -p --cached $old
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
diff-tree-helper.c | 6 ++--
diff.c | 67 ++++++++++++++++++++++++++++++++++++++++++-----------
diff.h | 13 +++-------
3 files changed, 62 insertions(+), 24 deletions(-)
# - 2: Use GIT_EXTERNAL_DIFF exit status to terminate diff early.
# + 5: diff-cache --cached case optimization.
--- a/diff-tree-helper.c
+++ b/diff-tree-helper.c
@@ -35,7 +35,7 @@ static int parse_oneside_change(const ch
if (strncmp(cp, "\tblob\t", 6))
return -1;
cp += 6;
- if (get_sha1_hex(cp, one->u.sha1))
+ if (get_sha1_hex(cp, one->blob_sha1))
return -1;
cp += 40;
if (*cp++ != '\t')
@@ -83,13 +83,13 @@ static int parse_diff_tree_output(const
if (strncmp(cp, "\tblob\t", 6))
return -1;
cp += 6;
- if (get_sha1_hex(cp, old.u.sha1))
+ if (get_sha1_hex(cp, old.blob_sha1))
return -1;
cp += 40;
if (strncmp(cp, "->", 2))
return -1;
cp += 2;
- if (get_sha1_hex(cp, new.u.sha1))
+ if (get_sha1_hex(cp, new.blob_sha1))
return -1;
cp += 40;
if (*cp++ != '\t')
--- a/diff.c
+++ b/diff.c
@@ -132,11 +132,50 @@ static void builtin_diff(const char *nam
execlp("/bin/sh","sh", "-c", cmd, NULL);
}
+/*
+ * Given a name and sha1 pair, if the dircache tells us the file in
+ * the work tree has that object contents, return true, so that
+ * prepare_temp_file() does not have to inflate and extract.
+ */
+static int work_tree_matches(const char *name, const unsigned char *sha1)
+{
+ struct cache_entry *ce;
+ struct stat st;
+ int pos, len;
+
+ /* We do not read the cache ourselves here, because the
+ * benchmark with my previous version that always reads cache
+ * shows that it makes things worse for diff-tree comparing
+ * two linux-2.6 kernel trees in an already checked out work
+ * tree. This is because most diff-tree comparison deals with
+ * only a small number of files, while reading the cache is
+ * expensive for a large project, and its cost outweighs the
+ * savings we get by not inflating the object to a temporary
+ * file. Practically, this code only helps when we are used
+ * by diff-cache --cached, which does read the cache before
+ * calling us.
+ */
+ if (!active_cache)
+ return 0;
+
+ len = strlen(name);
+ pos = cache_name_pos(name, len);
+ if (pos < 0)
+ return 0;
+ ce = active_cache[pos];
+ if ((stat(name, &st) < 0) ||
+ cache_match_stat(ce, &st) ||
+ memcmp(sha1, ce->sha1, 20))
+ return 0;
+ return 1;
+}
+
static void prepare_temp_file(const char *name,
struct diff_tempfile *temp,
struct diff_spec *one)
{
static unsigned char null_sha1[20] = { 0, };
+ int use_work_tree = 0;
if (!one->file_valid) {
not_a_valid_file:
@@ -150,20 +189,22 @@ static void prepare_temp_file(const char
}
if (one->sha1_valid &&
- !memcmp(one->u.sha1, null_sha1, sizeof(null_sha1))) {
- one->sha1_valid = 0;
- one->u.name = name;
- }
+ (!memcmp(one->blob_sha1, null_sha1, sizeof(null_sha1)) ||
+ work_tree_matches(name, one->blob_sha1)))
+ use_work_tree = 1;
- if (!one->sha1_valid) {
+ if (!one->sha1_valid || use_work_tree) {
struct stat st;
- temp->name = one->u.name;
+ temp->name = name;
if (stat(temp->name, &st) < 0) {
if (errno == ENOENT)
goto not_a_valid_file;
die("stat(%s): %s", temp->name, strerror(errno));
}
- strcpy(temp->hex, sha1_to_hex(null_sha1));
+ if (!one->sha1_valid)
+ strcpy(temp->hex, sha1_to_hex(null_sha1));
+ else
+ strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
sprintf(temp->mode, "%06o",
S_IFREG |ce_permissions(st.st_mode));
}
@@ -173,10 +214,10 @@ static void prepare_temp_file(const char
char type[20];
unsigned long size;
- blob = read_sha1_file(one->u.sha1, type, &size);
+ blob = read_sha1_file(one->blob_sha1, type, &size);
if (!blob || strcmp(type, "blob"))
die("unable to read blob object for %s (%s)",
- name, sha1_to_hex(one->u.sha1));
+ name, sha1_to_hex(one->blob_sha1));
strcpy(temp->tmp_path, ".diff_XXXXXX");
fd = mkstemp(temp->tmp_path);
@@ -187,7 +228,7 @@ static void prepare_temp_file(const char
close(fd);
free(blob);
temp->name = temp->tmp_path;
- strcpy(temp->hex, sha1_to_hex(one->u.sha1));
+ strcpy(temp->hex, sha1_to_hex(one->blob_sha1));
temp->hex[40] = 0;
sprintf(temp->mode, "%06o", one->mode);
}
@@ -285,7 +326,7 @@ void diff_addremove(int addremove, unsig
char concatpath[PATH_MAX];
struct diff_spec spec[2], *one, *two;
- memcpy(spec[0].u.sha1, sha1, 20);
+ memcpy(spec[0].blob_sha1, sha1, 20);
spec[0].mode = mode;
spec[0].sha1_valid = spec[0].file_valid = 1;
spec[1].file_valid = 0;
@@ -310,9 +351,9 @@ void diff_change(unsigned old_mode, unsi
char concatpath[PATH_MAX];
struct diff_spec spec[2];
- memcpy(spec[0].u.sha1, old_sha1, 20);
+ memcpy(spec[0].blob_sha1, old_sha1, 20);
spec[0].mode = old_mode;
- memcpy(spec[1].u.sha1, new_sha1, 20);
+ memcpy(spec[1].blob_sha1, new_sha1, 20);
spec[1].mode = new_mode;
spec[0].sha1_valid = spec[0].file_valid = 1;
spec[1].sha1_valid = spec[1].file_valid = 1;
--- a/diff.h
+++ b/diff.h
@@ -20,15 +20,12 @@ extern void diff_unmerge(const char *pat
/* These are for diff-tree-helper */
struct diff_spec {
- union {
- const char *name; /* path on the filesystem */
- unsigned char sha1[20]; /* blob object ID */
- } u;
+ unsigned char blob_sha1[20];
unsigned short mode; /* file mode */
- unsigned sha1_valid : 1; /* if true, use u.sha1 and trust mode.
- * (however with a NULL SHA1, read them
- * from the file!).
- * if false, use u.name and read mode from
+ unsigned sha1_valid : 1; /* if true, use blob_sha1 and trust mode;
+ * however with a NULL SHA1, read them
+ * from the file system.
+ * if false, use the name and read mode from
* the filesystem.
*/
unsigned file_valid : 1; /* if false the file does not even exist */
next reply other threads:[~2005-05-03 22:04 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-05-03 22:10 Junio C Hamano [this message]
-- strict thread matches above, loose matches on Subject: below --
2005-05-04 23:56 [PATCH] Optimize diff-cache -p --cached Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=7vk6mgt0z2.fsf@assigned-by-dhcp.cox.net \
--to=junkio@cox.net \
--cc=git@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox