* Set up for better tree diff optimizations
@ 2007-03-18 22:18 Linus Torvalds
2007-03-21 7:09 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2007-03-18 22:18 UTC (permalink / raw)
To: Junio C Hamano, Git Mailing List
This is mainly just a cleanup patch, and sets up for later changes where
the tree-diff.c "interesting()" function can return more than just a
yes/no value.
In particular, it should be quite possible to say "no subsequent entries
in this tree can possibly be interesting any more", and thus allow the
callers to short-circuit the tree entirely.
In fact, changing the callers to do so is trivial, and is really all this
patch really does, because changing "interesting()" itself to say that
nothing further is going to be interesting is definitely more complicated,
considering that we may have arbitrary pathspecs.
But in cleaning up the callers, this actually fixes a potential small
performance issue in diff_tree(): if the second tree has a lot of
uninterestign crud in it, we would keep on doing the "is it interesting?"
check on the first tree for each uninteresting entry in the second one.
The answer is obviously not going to change, so that was just not helping.
The new code is clearer and simpler and avoids this issue entirely.
I also renamed "interesting()" to "tree_entry_interesting()", because I
got frustrated by the fact that
- we actually had *another* function called "interesting()" in another
file, and I couldn't tell from the profiles which one was the one that
mattered more.
- when rewriting it to return a ternary value, you can't just do
if (interesting(...))
...
any more, but want to assign the return value to a local variable. The
name of choice for that variable would normally be "interesting", so
I just wanted to make the function name be more specific, and avoid
that whole issue (even though I then didn't choose that name for either
of the users, just to avoid confusion in the patch itself ;)
In other words, this doesn't really change anything, but I think it's a
good thing to do, and if somebody comes along and writes the logic for
"yeah, none of the pathspecs you have are interesting", we now support
that trivially.
It could easily be a meaningful optimization for things like "blame",
where there's just one pathspec, and stopping when you've seen it would
allow you to avoid about 50% of the tree traversals on average.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
Junio, your decision.
tree-diff.c | 44 ++++++++++++++++++++++++++++++++++----------
1 files changed, 34 insertions(+), 10 deletions(-)
diff --git a/tree-diff.c b/tree-diff.c
index f89b9d3..2e0a3ae 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -66,7 +66,15 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const
return 0;
}
-static int interesting(struct tree_desc *desc, const char *base, int baselen, struct diff_options *opt)
+/*
+ * Is a tree entry interesting given the pathspec we have?
+ *
+ * Return:
+ * - positive for yes
+ * - zero for no
+ * - negative for "no, and no subsequent entries will be either"
+ */
+static int tree_entry_interesting(struct tree_desc *desc, const char *base, int baselen, struct diff_options *opt)
{
const char *path;
const unsigned char *sha1;
@@ -123,7 +131,10 @@ static int interesting(struct tree_desc *desc, const char *base, int baselen, st
static void show_tree(struct diff_options *opt, const char *prefix, struct tree_desc *desc, const char *base, int baselen)
{
while (desc->size) {
- if (interesting(desc, base, baselen, opt))
+ int show = tree_entry_interesting(desc, base, baselen, opt);
+ if (show < 0)
+ break;
+ if (show)
show_entry(opt, prefix, desc, base, baselen);
update_tree_entry(desc);
}
@@ -158,20 +169,33 @@ static void show_entry(struct diff_options *opt, const char *prefix, struct tree
}
}
+static void skip_uninteresting(struct tree_desc *t, const char *base, int baselen, struct diff_options *opt)
+{
+ while (t->size) {
+ int show = tree_entry_interesting(t, base, baselen, opt);
+ if (!show) {
+ update_tree_entry(t);
+ continue;
+ }
+ /* Skip it all? */
+ if (show < 0)
+ t->size = 0;
+ return;
+ }
+}
+
int diff_tree(struct tree_desc *t1, struct tree_desc *t2, const char *base, struct diff_options *opt)
{
int baselen = strlen(base);
- while (t1->size | t2->size) {
- if (opt->nr_paths && t1->size && !interesting(t1, base, baselen, opt)) {
- update_tree_entry(t1);
- continue;
- }
- if (opt->nr_paths && t2->size && !interesting(t2, base, baselen, opt)) {
- update_tree_entry(t2);
- continue;
+ for (;;) {
+ if (opt->nr_paths) {
+ skip_uninteresting(t1, base, baselen, opt);
+ skip_uninteresting(t2, base, baselen, opt);
}
if (!t1->size) {
+ if (!t2->size)
+ break;
show_entry(opt, "+", t2, base, baselen);
update_tree_entry(t2);
continue;
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: Set up for better tree diff optimizations
2007-03-18 22:18 Set up for better tree diff optimizations Linus Torvalds
@ 2007-03-21 7:09 ` Junio C Hamano
2007-03-21 15:20 ` Linus Torvalds
0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2007-03-21 7:09 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
Linus Torvalds <torvalds@linux-foundation.org> writes:
> This is mainly just a cleanup patch, and sets up for later changes where
> the tree-diff.c "interesting()" function can return more than just a
> yes/no value.
>
> In particular, it should be quite possible to say "no subsequent entries
> in this tree can possibly be interesting any more", and thus allow the
> callers to short-circuit the tree entirely.
>
> In fact, changing the callers to do so is trivial, and is really all this
> patch really does, because changing "interesting()" itself to say that
> nothing further is going to be interesting is definitely more complicated,
> considering that we may have arbitrary pathspecs.
> ...
> Junio, your decision.
Sorry, as I was sick and spent almost all day in bed, I am
somewhat behind going through the mailing list backlog.
> tree-diff.c | 44 ++++++++++++++++++++++++++++++++++----------
> 1 files changed, 34 insertions(+), 10 deletions(-)
>
> diff --git a/tree-diff.c b/tree-diff.c
> index f89b9d3..2e0a3ae 100644
> --- a/tree-diff.c
> +++ b/tree-diff.c
> @@ -66,7 +66,15 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const
> return 0;
> }
>
> -static int interesting(struct tree_desc *desc, const char *base, int baselen, struct diff_options *opt)
> +/*
> + * Is a tree entry interesting given the pathspec we have?
> + *
> + * Return:
> + * - positive for yes
> + * - zero for no
> + * - negative for "no, and no subsequent entries will be either"
> + */
> +static int tree_entry_interesting(struct tree_desc *desc, const char *base, int baselen, struct diff_options *opt)
> {
> const char *path;
> const unsigned char *sha1;
I've already applied the patch, but I do not know how much this
interface would help, as the only easy case the function
tree_entry_interesting() can say "no subsequent entries will be
either" without looking ahead is where no pathspecs match the
base, but that is already prevented by the way you walk the tree
(you do not descend into an uninteresting tree).
If you do:
$ git log arch/i386/ include/asm-i386/
and if the traversal is already in arch/i386/crypto, then it is
unnecessary to check if a path discovered during the traversal
is interesting, as the base ("arch/i386/crypto") for all the
entries in that subdirectory is already covered by one of the
pathspecs ("arch/i386"). It also means checking the other
pathspec "include/asm-i386/" is a waste of time, because that
will not match the base no matter what path is discovered from
the tree object anyway.
I suspect that two possible optimizations are:
(1) instead of, or in addition to, allowing it to say "no
subsequent ones will match", allow it to say "everything
for this base will be interesting, stop bothering to ask me
one path at a time".
(2) mark include/asm-i386/ pathspec to be N/A while we are
already in arch/i386/crypto, as we already know it will
never match. And remove that mark when we come back from
that recursion.
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: Set up for better tree diff optimizations
2007-03-21 7:09 ` Junio C Hamano
@ 2007-03-21 15:20 ` Linus Torvalds
2007-03-21 16:51 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2007-03-21 15:20 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Git Mailing List
On Wed, 21 Mar 2007, Junio C Hamano wrote:
>
> I've already applied the patch, but I do not know how much this
> interface would help, as the only easy case the function
> tree_entry_interesting() can say "no subsequent entries will be
> either" without looking ahead is where no pathspecs match the
> base, but that is already prevented by the way you walk the tree
> (you do not descend into an uninteresting tree).
No. You miss the important case.
> If you do:
>
> $ git log arch/i386/ include/asm-i386/
Right. Forget about the paths we're *interested* in.
Look at the ones we're *not* interested in!
At the top level, what's the tree? It looks like this:
100644 blob 060a71d41ad7ccc3214065a182e6f67568420071 .gitignore
100644 blob bf62dbea88e613d8a22a6f9977289b4eb2eb968b .mailmap
100644 blob ca442d313d86dc67e0a2e5d584b465bd382cbf5c COPYING
100644 blob 6bd8ab86b5bd1b80f2df2ceaf0a4f8d7634639a0 CREDITS
040000 tree 14499497d9d170b046e5cf9aafeec76647ece9a3 Documentation
100644 blob 0451f69353bad4d07de34fd4658f40b805bd467a Kbuild
100644 blob 6d8d5b917d1fe0f4ff011d738ca9d13947521eb8 MAINTAINERS
100644 blob 1c018c468e153ac85079f90a209a9bb3a09c5671 Makefile
100644 blob 159912cf515549455ed5f579b8825d28962bd22e README
100644 blob ac02e42a2627f359575ae0e689afca911a75e0ff REPORTING-BUGS
040000 tree 46262adf3cfb89b4028a2d4beb17d9089e26902f arch
040000 tree 45d8a2724b4414f7fc3a2dac66929fdf0d032986 block
040000 tree 21696299da054c8ad04c5fc87f21ecc4a7d0e464 crypto
040000 tree ff75402a4a48101c95e9dc820c7c822767ca2ac3 drivers
040000 tree 734d24c7dcfbe8d9b03045aebc2148f92c5f4607 fs
040000 tree e3d0731913e852e7f256af5b6a3cd50bea8d4993 include
040000 tree e39426d7179d961d96e27f7250fb16a483cc17db init
040000 tree 0438f8748d21ad402d305bf469b27f59bc1300b9 ipc
040000 tree dc33b0bd31bdf49aaa04f3357f20bb1a189bac4a kernel
040000 tree 7cfb510f6854fe8c51f15fefbedda66773c0f23a lib
040000 tree cfb673c385e328bbdefb60c3ce59c8f3ba7f215e mm
040000 tree 30550e7a0676fe13a2e48d5f3c3872112e9634d2 net
040000 tree 5194f3b70deaa7f98cadfeca300fd140b1086409 scripts
040000 tree 164b706596d177b15750eb0da157758397195885 security
040000 tree e5d9e19873759dcccd45dde4d76a0ad75db8fe5c sound
040000 tree 09109a71d77e211a4253fdde8afec7f119055e3b usr
And with your pathspec, we can tell after we see "init", that we're not
interested in any of the rest. In other words, we could consistently skip
and not even parse almost 50% of the top-level tree.
And if you only have the first path-spec ("arch/i386") you could skip
everything after you've seen "block", because you know that the rest of
the tree simply isn't interesting any more.
Similarly, look at what happens (with your two-entry pathspec) when you
enter "arch". Obviously the "include" thing will be totally uninteresting,
so as far as that one is concerned, you can skip the *whole* thing, but
because we have multiple path-specs we need to skip only when *both*
decide that it's uninteresting. So look at the "arch" sub-tree, and we
have
alpha
arm
arm26
avr32
cris
frv
h8300
i386
ia64
m32r
m68k
m68knommu
mips
parisc
powerpc
ppc
s390
sh
sh64
sparc
sparc64
um
v850
x86_64
xtensa
and again, when we hit ia64, we *could* return -1, because "include" is
totally uninteresting for all of it, and the "arch/i386/" pathspec can
tell that anything below ia64 is uninteresting, because ia64 sorts after
it, and the tree is alphabetically sorted.
So you could skip even parsing half of both the top-level tree _and_ more
than half of the "arch" tree.
Linus
^ permalink raw reply [flat|nested] 8+ messages in thread* Re: Set up for better tree diff optimizations
2007-03-21 15:20 ` Linus Torvalds
@ 2007-03-21 16:51 ` Junio C Hamano
2007-03-21 18:21 ` Linus Torvalds
0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2007-03-21 16:51 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Wed, 21 Mar 2007, Junio C Hamano wrote:
>>
>> I've already applied the patch, but I do not know how much this
>> interface would help, as the only easy case the function
>> tree_entry_interesting() can say "no subsequent entries will be
>> either" without looking ahead is where no pathspecs match the
>> base, but that is already prevented by the way you walk the tree
>> (you do not descend into an uninteresting tree).
>
> No. You miss the important case.
>
>> If you do:
>>
>> $ git log arch/i386/ include/asm-i386/
>
> Right. Forget about the paths we're *interested* in.
>
> Look at the ones we're *not* interested in!
As always, you are right and enlightened others with your
superiour intelligence enough to produce code for you so that
you do not have to write yourself ;-).
Would something like this suit your taste?
The "rev-list org.eclipse.debug.ui/" test that took 16-17
seconds takes 9 seconds with this patch. Running with a
diffrent pathspec "org.apache.ant/" obviously makes it go a lot
faster (15sec vs 7sec).
-- >8 --
[PATCH] Teach tree_entry_interesting() that the tree entries are sorted.
When we are looking at a tree entry with pathspecs, if all the
pathspecs sort strictly earlier than the entry we are currently
looking at, there is no way later entries in the same tree would
match out pathspecs, because the entries are sorted.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
tree-diff.c | 43 +++++++++++++++++++++++++++++++++++++------
1 files changed, 37 insertions(+), 6 deletions(-)
diff --git a/tree-diff.c b/tree-diff.c
index b2f35dc..459a0ff 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -81,6 +81,7 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
unsigned mode;
int i;
int pathlen;
+ int never_interesting = -1;
if (!opt->nr_paths)
return 1;
@@ -89,9 +90,10 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
pathlen = tree_entry_len(path, sha1);
- for (i=0; i < opt->nr_paths; i++) {
+ for (i = 0; i < opt->nr_paths; i++) {
const char *match = opt->paths[i];
int matchlen = opt->pathlens[i];
+ int m;
if (baselen >= matchlen) {
/* If it doesn't match, move along... */
@@ -109,6 +111,32 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
match += baselen;
matchlen -= baselen;
+ /*
+ * Does match sort strictly earlier than path with their
+ * common parts?
+ */
+ m = strncmp(match, path,
+ (matchlen < pathlen) ? matchlen : pathlen);
+ if (m < 0)
+ continue;
+
+ /*
+ * If we come here even once, that means there is at
+ * least one pathspec that would sort later than the
+ * path we are currently looking at (even though this
+ * particular one we are currently looking at might
+ * not match). In other words, if we have never
+ * reached this point after iterating all pathspecs,
+ * it means all pathspecs are either outside of base,
+ * or inside the base but sorts earlier than the
+ * current one. In either case, they will never match
+ * the subsequent entries. In such a case, we
+ * initialized the variable to -1 and that is what
+ * will be returned, allowing the caller to terminate
+ * early.
+ */
+ never_interesting = 0;
+
if (pathlen > matchlen)
continue;
@@ -119,12 +147,15 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
continue;
}
- if (strncmp(path, match, pathlen))
- continue;
-
- return 1;
+ /*
+ * If common part matched earlier then it is a hit,
+ * because we rejected the case where path is not a
+ * leading directory and is shorter than match.
+ */
+ if (!m)
+ return 1;
}
- return 0; /* No matches */
+ return never_interesting; /* No matches */
}
/* A whole sub-tree went away or appeared */
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: Set up for better tree diff optimizations
2007-03-21 16:51 ` Junio C Hamano
@ 2007-03-21 18:21 ` Linus Torvalds
2007-03-21 19:05 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Linus Torvalds @ 2007-03-21 18:21 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Git Mailing List
On Wed, 21 Mar 2007, Junio C Hamano wrote:
>
> As always, you are right and enlightened others with your
> superiour intelligence enough to produce code for you so that
> you do not have to write yourself ;-).
What can I say? It's a gift. I write _just_ enough code so that others get
intrigued and encouraged to do the rest. Then I step in, and take all the
glory.
> Would something like this suit your taste?
This looks fine, although the reason I didn't get it done myself is that I
have this nagging feeling that there must be some clever way to make it
even faster. I hated having to do the "strncmp()" early, when it's not
always needed.
But like your patch, I could never come up with a way to *both* do the
"don't strncmp() if you don't have to" *and* do the "nothing further will
be interesting any more" optimization..
That said, your numbers are pretty convincing:
> The "rev-list org.eclipse.debug.ui/" test that took 16-17
> seconds takes 9 seconds with this patch. Running with a
> diffrent pathspec "org.apache.ant/" obviously makes it go a lot
> faster (15sec vs 7sec).
Yeah, and conversely, a pathspec at the end of the list of paths will make
it less of an optimization. But in general it shouldn't ever be a loss
except for the rather rare case of asking for a path at the end of a tree,
and then the loss should be pretty small (ie it's the result of doing a
few strncmp's that we used to be able to skip).
Considering that the win on _average_ is in the 50% range (and your
numbers back that up), it's definitely worth it.
Linus
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Set up for better tree diff optimizations
2007-03-21 18:21 ` Linus Torvalds
@ 2007-03-21 19:05 ` Junio C Hamano
2007-03-21 19:54 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2007-03-21 19:05 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
Linus Torvalds <torvalds@linux-foundation.org> writes:
> On Wed, 21 Mar 2007, Junio C Hamano wrote:
> ...
>> Would something like this suit your taste?
>
> This looks fine, although the reason I didn't get it done myself is that I
> have this nagging feeling that there must be some clever way to make it
> even faster. I hated having to do the "strncmp()" early, when it's not
> always needed.
>
> But like your patch, I could never come up with a way to *both* do the
> "don't strncmp() if you don't have to" *and* do the "nothing further will
> be interesting any more" optimization..
I briefly tried match[0] < path[0] as a compromise, but it did
not fly well with the eclipse example for obvious reasons ;-).
>> The "rev-list org.eclipse.debug.ui/" test that took 16-17
>> seconds takes 9 seconds with this patch. Running with a
>> diffrent pathspec "org.apache.ant/" obviously makes it go a lot
>> faster (15sec vs 7sec).
>
> Yeah, and conversely, a pathspec at the end of the list of paths will make
> it less of an optimization. But in general it shouldn't ever be a loss
> except for the rather rare case of asking for a path at the end of a tree,
> and then the loss should be pretty small (ie it's the result of doing a
> few strncmp's that we used to be able to skip).
We _could_ check never_interesting first and if it is already
dropped then defer strncmp() and check the pathlen > matchlen
comparison first.
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: Set up for better tree diff optimizations
2007-03-21 19:05 ` Junio C Hamano
@ 2007-03-21 19:54 ` Junio C Hamano
2007-03-22 0:00 ` Junio C Hamano
0 siblings, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2007-03-21 19:54 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
Junio C Hamano <junkio@cox.net> writes:
> Linus Torvalds <torvalds@linux-foundation.org> writes:
>
>> On Wed, 21 Mar 2007, Junio C Hamano wrote:
>> ...
>>> Would something like this suit your taste?
>>
>> This looks fine, although the reason I didn't get it done myself is that I
>> have this nagging feeling that there must be some clever way to make it
>> even faster. I hated having to do the "strncmp()" early, when it's not
>> always needed.
> ...
> We _could_ check never_interesting first and if it is already
> dropped then defer strncmp() and check the pathlen > matchlen
> comparison first.
So this is the round #2, as a replacement patch of what I sent
earlier. Once we know that there are pathspecs that sort later
than the current path, we can defer doing strncmp() and skip
that pathspec early by comparing length. If the path is longer
than the spec, we can tell that it would never match without
comparing.
Best time for "git-rev-list HEAD -- arch/i386/ include/asm-i386/"
are (ten runs each):
without any patch: 1.17user
with the previous: 1.13user
with this patch: 1.11user
-- >8 --
Teach tree_entry_interesting() that the tree entries are sorted.
When we are looking at a tree entry with pathspecs, if all the
pathspecs sort strictly earlier than the entry we are currently
looking at, there is no way later entries in the same tree would
match out pathspecs, because the entries are sorted.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
tree-diff.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++------
1 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/tree-diff.c b/tree-diff.c
index 3678805..15fd665 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -81,6 +81,7 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
unsigned mode;
int i;
int pathlen;
+ int never_interesting = -1;
if (!opt->nr_paths)
return 1;
@@ -89,9 +90,10 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
pathlen = tree_entry_len(path, sha1);
- for (i=0; i < opt->nr_paths; i++) {
+ for (i = 0; i < opt->nr_paths; i++) {
const char *match = opt->paths[i];
int matchlen = opt->pathlens[i];
+ int m = -1; /* signals that we haven't called strncmp() */
if (baselen >= matchlen) {
/* If it doesn't match, move along... */
@@ -109,6 +111,37 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
match += baselen;
matchlen -= baselen;
+ if (never_interesting) {
+ /*
+ * We have not seen any match that sorts later
+ * than the current path.
+ */
+
+ /*
+ * Does match sort strictly earlier than path
+ * with their common parts?
+ */
+ m = strncmp(match, path,
+ (matchlen < pathlen) ? matchlen : pathlen);
+ if (m < 0)
+ continue;
+
+ /*
+ * If we come here even once, that means there is at
+ * least one pathspec that would sort equal to or
+ * later than the path we are currently looking at.
+ * In other words, if we have never reached this point
+ * after iterating all pathspecs, it means all
+ * pathspecs are either outside of base, or inside the
+ * base but sorts strictly earlier than the current
+ * one. In either case, they will never match the
+ * subsequent entries. In such a case, we initialized
+ * the variable to -1 and that is what will be
+ * returned, allowing the caller to terminate early.
+ */
+ never_interesting = 0;
+ }
+
if (pathlen > matchlen)
continue;
@@ -119,12 +152,22 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
continue;
}
- if (strncmp(path, match, pathlen))
- continue;
-
- return 1;
+ if (m == -1)
+ /*
+ * we cheated and did not do strncmp(), so we do
+ * that here.
+ */
+ m = strncmp(match, path, pathlen);
+
+ /*
+ * If common part matched earlier then it is a hit,
+ * because we rejected the case where path is not a
+ * leading directory and is shorter than match.
+ */
+ if (!m)
+ return 1;
}
- return 0; /* No matches */
+ return never_interesting; /* No matches */
}
/* A whole sub-tree went away or appeared */
^ permalink raw reply related [flat|nested] 8+ messages in thread* Re: Set up for better tree diff optimizations
2007-03-21 19:54 ` Junio C Hamano
@ 2007-03-22 0:00 ` Junio C Hamano
0 siblings, 0 replies; 8+ messages in thread
From: Junio C Hamano @ 2007-03-22 0:00 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List
Junio C Hamano <junkio@cox.net> writes:
> So this is the round #2, as a replacement patch of what I sent
> earlier. Once we know that there are pathspecs that sort later
> than the current path, we can defer doing strncmp() and skip
> that pathspec early by comparing length. If the path is longer
> than the spec, we can tell that it would never match without
> comparing.
And then this comes on top of #2.
-- >8 --
tree_entry_interesting(): allow it to say "everything is interesting"
In addition to optimizing pathspecs that would never match which
was done earlier, this optimizes pathspecs that would always
match (e.g. "arch/" while the traversal is already in
"arch/i386/" hierarchy).
With this, the worst case become slightly more palatable, while
improving average case.
Signed-off-by: Junio C Hamano <junkio@cox.net>
---
For example, best-of-ten user times of
"rev-list HEAD -- $pathspec" in the eclipse repository for
various pathspecs are:
* .cvsignore (best path, earliest in the tree)
11.84 (without any optimization)
6.35 (with patch #2)
6.35 (with patch #2 and this patch)
* platform-launcher (worst path, last in the tree)
12.20 (without any optimization)
12.90 (with patch #2)
12.75 (with patch #2 and this patch)
* org.eclipse.platform.macosx.carbon-feature (middle in the tree)
12.02 (without any optimization)
10.52 (with patch #2)
10.43 (with patch #2 and this patch)
The kernel archive number is similar. Limited to arch/i386 and
include/asm-i386, the numbers are:
1.18 (without any optimization)
1.11 (with patch #2)
1.10 (with patch #2 and this patch).
tree-diff.c | 33 ++++++++++++++++++++++++++++-----
1 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/tree-diff.c b/tree-diff.c
index 15fd665..852498e 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -70,7 +70,8 @@ static int compare_tree_entry(struct tree_desc *t1, struct tree_desc *t2, const
* Is a tree entry interesting given the pathspec we have?
*
* Return:
- * - positive for yes
+ * - 2 for "yes, and all subsequent entries will be"
+ * - 1 for yes
* - zero for no
* - negative for "no, and no subsequent entries will be either"
*/
@@ -100,8 +101,11 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
if (strncmp(base, match, matchlen))
continue;
- /* The base is a subdirectory of a path which was specified. */
- return 1;
+ /*
+ * The base is a subdirectory of a path which
+ * was specified, so all of them are interesting.
+ */
+ return 2;
}
/* Does the base match? */
@@ -173,8 +177,18 @@ static int tree_entry_interesting(struct tree_desc *desc, const char *base, int
/* A whole sub-tree went away or appeared */
static void show_tree(struct diff_options *opt, const char *prefix, struct tree_desc *desc, const char *base, int baselen)
{
+ int all_interesting = 0;
while (desc->size) {
- int show = tree_entry_interesting(desc, base, baselen, opt);
+ int show;
+
+ if (all_interesting)
+ show = 1;
+ else {
+ show = tree_entry_interesting(desc, base, baselen,
+ opt);
+ if (show == 2)
+ all_interesting = 1;
+ }
if (show < 0)
break;
if (show)
@@ -215,8 +229,17 @@ static void show_entry(struct diff_options *opt, const char *prefix, struct tree
static void skip_uninteresting(struct tree_desc *t, const char *base, int baselen, struct diff_options *opt)
{
+ int all_interesting = 0;
while (t->size) {
- int show = tree_entry_interesting(t, base, baselen, opt);
+ int show;
+
+ if (all_interesting)
+ show = 1;
+ else {
+ show = tree_entry_interesting(t, base, baselen, opt);
+ if (show == 2)
+ all_interesting = 1;
+ }
if (!show) {
update_tree_entry(t);
continue;
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2007-03-22 0:01 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-18 22:18 Set up for better tree diff optimizations Linus Torvalds
2007-03-21 7:09 ` Junio C Hamano
2007-03-21 15:20 ` Linus Torvalds
2007-03-21 16:51 ` Junio C Hamano
2007-03-21 18:21 ` Linus Torvalds
2007-03-21 19:05 ` Junio C Hamano
2007-03-21 19:54 ` Junio C Hamano
2007-03-22 0:00 ` Junio C Hamano
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).