* Re: git-rev-list: add "--dense" flag @ 2005-10-23 19:30 Marco Costalba 0 siblings, 0 replies; 15+ messages in thread From: Marco Costalba @ 2005-10-23 19:30 UTC (permalink / raw) To: Linus Torvalds; +Cc: Git Mailing List Linus Torvalds wrote: > >And it scales pretty well too. On the historical linux archive, which is >three years of history, the same thing takes me just over 12 seconds and >52MB, and that's for the _whole_ history. And it's not just following one >file: it's following that subdirectory. > >So it really is pretty damn cool. > Yes, it is. Very powerful and useful tool indeed. IMHO kudos! >Of course, I might have a bug somewhere, but it all _seems_ to work very >well indeed. > The only bug I found is in qgit ;-) that failed to correctly handle --dense option. It is now fixed in my GIT archive: http://digilander.libero.it/mcostalba/qgit.git Marco __________________________________ Yahoo! FareChase: Search multiple travel sites in one click. http://farechase.yahoo.com ^ permalink raw reply [flat|nested] 15+ messages in thread
* git-rev-list: add "--dense" flag
@ 2005-10-21 23:40 Linus Torvalds
2005-10-21 23:47 ` Linus Torvalds
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: Linus Torvalds @ 2005-10-21 23:40 UTC (permalink / raw)
To: Junio C Hamano, Git Mailing List
This is what the recent git-rev-list changes have all been gearing up for.
When we use a path filter to git-rev-list, the new "--dense" flag asks
git-rev-list to compress the history so that it _only_ contains commits
that change files in the path filter. It also rewrites the parent
information so that tools like "gitk" will see the result as a dense
history tree.
For example, on the current kernel archive:
[torvalds@g5 linux]$ git-rev-list HEAD | wc -l
9904
[torvalds@g5 linux]$ git-rev-list HEAD -- kernel | wc -l
5442
[torvalds@g5 linux]$ git-rev-list --dense HEAD -- kernel | wc -l
356
which shows that while we have almost ten thousand commits, we can prune
down the work to slightly more than half by only following the merges
that are interesting. But further, we can then compress the history to
just 356 entries that actually make changes to the kernel subdirectory.
To see this in action, try something like
gitk --dense -- gitk
to see just the history that affects gitk. Or, to show that true
parallell development still remains parallell, do
gitk --dense -- daemon.c
which shows some parallell commits in the current git tree.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
I'm really happy with how this turned out. It's a bit expensive to run on
big archives, but I _really_ think it's quite spectacular. And likely very
useful indeed.
For example, say you love gitk, but only care about networking changes.
Easy enough, just do
gitk --dense -- net/ include/net/
and off you go. It's not free (we do a _lot_ of tree comparisons), but
dammit, it's fast enough that it's very very useful. The tree comparisons
are done very efficiently.
This is _way_ more powerful than annotate. Interested in just a single
file? Just do
gitk --dense -- kernel/exit.c
and it will show the 17 or so commits that change kernel/exit.c with the
right history (it turns out that there is no parallell development at all
in that file, so in this case it will linearize history entirely).
Damn, I'm good.
diff --git a/rev-list.c b/rev-list.c
index b5dbb9f..5f125fd 100644
--- a/rev-list.c
+++ b/rev-list.c
@@ -11,6 +11,7 @@
#define INTERESTING (1u << 1)
#define COUNTED (1u << 2)
#define SHOWN (1u << 3)
+#define TREECHANGE (1u << 4)
static const char rev_list_usage[] =
"git-rev-list [OPTION] commit-id <commit-id>\n"
@@ -27,6 +28,7 @@ static const char rev_list_usage[] =
" --merge-order [ --show-breaks ]\n"
" --topo-order";
+static int dense = 0;
static int unpacked = 0;
static int bisect_list = 0;
static int tag_objects = 0;
@@ -79,6 +81,26 @@ static void show_commit(struct commit *c
fflush(stdout);
}
+static void rewrite_one(struct commit **pp)
+{
+ for (;;) {
+ struct commit *p = *pp;
+ if (p->object.flags & (TREECHANGE | UNINTERESTING))
+ return;
+ /* Only single-parent commits don't have TREECHANGE */
+ *pp = p->parents->item;
+ }
+}
+
+static void rewrite_parents(struct commit *commit)
+{
+ struct commit_list *parent = commit->parents;
+ while (parent) {
+ rewrite_one(&parent->item);
+ parent = parent->next;
+ }
+}
+
static int filter_commit(struct commit * commit)
{
if (stop_traversal && (commit->object.flags & BOUNDARY))
@@ -95,6 +117,11 @@ static int filter_commit(struct commit *
return STOP;
if (no_merges && (commit->parents && commit->parents->next))
return CONTINUE;
+ if (paths && dense) {
+ if (!(commit->object.flags & TREECHANGE))
+ return CONTINUE;
+ rewrite_parents(commit);
+ }
return DO;
}
@@ -404,6 +431,14 @@ static struct diff_options diff_opt = {
.change = file_change,
};
+static int same_tree(struct tree *t1, struct tree *t2)
+{
+ is_different = 0;
+ if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
+ return 0;
+ return !is_different;
+}
+
static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent)
{
if (!commit->tree)
@@ -415,11 +450,7 @@ static struct commit *try_to_simplify_me
parse_commit(p);
if (!p->tree)
continue;
- is_different = 0;
- if (diff_tree_sha1(commit->tree->object.sha1,
- p->tree->object.sha1, "", &diff_opt) < 0)
- continue;
- if (!is_different)
+ if (same_tree(commit->tree, p->tree))
return p;
}
return NULL;
@@ -485,6 +516,27 @@ static void add_parents_to_list(struct c
}
}
+static void compress_list(struct commit_list *list)
+{
+ while (list) {
+ struct commit *commit = list->item;
+ struct commit_list *parent = commit->parents;
+ list = list->next;
+
+ /*
+ * Exactly one parent? Check if it leaves the tree
+ * unchanged
+ */
+ if (parent && !parent->next) {
+ struct tree *t1 = commit->tree;
+ struct tree *t2 = parent->item->tree;
+ if (!t1 || !t2 || same_tree(t1, t2))
+ continue;
+ }
+ commit->object.flags |= TREECHANGE;
+ }
+}
+
static struct commit_list *limit_list(struct commit_list *list)
{
struct commit_list *newlist = NULL;
@@ -514,6 +566,8 @@ static struct commit_list *limit_list(st
}
if (tree_objects)
mark_edges_uninteresting(newlist);
+ if (paths && dense)
+ compress_list(newlist);
if (bisect_list)
newlist = find_bisection(newlist);
return newlist;
@@ -700,6 +754,10 @@ int main(int argc, const char **argv)
limited = 1;
continue;
}
+ if (!strcmp(arg, "--dense")) {
+ dense = 1;
+ continue;
+ }
if (!strcmp(arg, "--")) {
paths = get_pathspec(prefix, argv + i + 1);
if (paths) {
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: git-rev-list: add "--dense" flag 2005-10-21 23:40 Linus Torvalds @ 2005-10-21 23:47 ` Linus Torvalds 2005-10-21 23:55 ` Linus Torvalds ` (2 subsequent siblings) 3 siblings, 0 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-21 23:47 UTC (permalink / raw) To: Junio C Hamano, Git Mailing List On Fri, 21 Oct 2005, Linus Torvalds wrote: > > parallell Oh, please also fix my spelling. That's one 'l' too many at the end. Damn, I think I'm a reasonably good speller most of the time, but this time it shows that Swedish is my first language (where it really _is_ written that way, with four 'l's total). Linus ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-21 23:40 Linus Torvalds 2005-10-21 23:47 ` Linus Torvalds @ 2005-10-21 23:55 ` Linus Torvalds 2005-10-22 0:37 ` Petr Baudis 2005-10-25 18:07 ` Jonas Fonseca 3 siblings, 0 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-21 23:55 UTC (permalink / raw) To: Junio C Hamano, Git Mailing List On Fri, 21 Oct 2005, Linus Torvalds wrote: > > [torvalds@g5 linux]$ git-rev-list --dense HEAD -- kernel | wc -l > 356 Btw, one additional point. This took 0.91 seconds to complete on the current kernel history on my machine with a pretty much fully packed archive, and the memory footprint was a total of about 12MB. And it scales pretty well too. On the historical linux archive, which is three years of history, the same thing takes me just over 12 seconds and 52MB, and that's for the _whole_ history. And it's not just following one file: it's following that subdirectory. So it really is pretty damn cool. Of course, I might have a bug somewhere, but it all _seems_ to work very well indeed. Linus ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-21 23:40 Linus Torvalds 2005-10-21 23:47 ` Linus Torvalds 2005-10-21 23:55 ` Linus Torvalds @ 2005-10-22 0:37 ` Petr Baudis 2005-10-22 1:26 ` Linus Torvalds 2005-10-25 18:07 ` Jonas Fonseca 3 siblings, 1 reply; 15+ messages in thread From: Petr Baudis @ 2005-10-22 0:37 UTC (permalink / raw) To: Linus Torvalds; +Cc: Junio C Hamano, Git Mailing List Dear diary, on Sat, Oct 22, 2005 at 01:40:54AM CEST, I got a letter where Linus Torvalds <torvalds@osdl.org> told me that... > When we use a path filter to git-rev-list, the new "--dense" flag asks > git-rev-list to compress the history so that it _only_ contains commits > that change files in the path filter. It also rewrites the parent > information so that tools like "gitk" will see the result as a dense > history tree. There is no documentation for the --dense flag and it is even missing from the usage string. But my main concern is - will it be possible to do the rename detection here as well? Using --dense instead of explicit diff-tree calls in cg-log would be nice optimization, but I was about to add support for optional following of renames for cg-log <filename>. That's really pretty useful to have, every time I hit the Junio's big scripts rename I keep repeating that to myself. ;-) Now when core GIT got the comfort of per-file history, I only hope that it will start to annoy you as well. -- Petr "Pasky" Baudis Stuff: http://pasky.or.cz/ VI has two modes: the one in which it beeps and the one in which it doesn't. ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-22 0:37 ` Petr Baudis @ 2005-10-22 1:26 ` Linus Torvalds 2005-10-22 2:56 ` Petr Baudis 2005-10-22 3:55 ` Junio C Hamano 0 siblings, 2 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-22 1:26 UTC (permalink / raw) To: Petr Baudis; +Cc: Junio C Hamano, Git Mailing List On Sat, 22 Oct 2005, Petr Baudis wrote: > > There is no documentation for the --dense flag and it is even missing > from the usage string. I'm not much for docs ;) The whole path argument also isn't even there (and without paths, --dense doesn't matter) > But my main concern is - will it be possible to do the rename detection > here as well? Yes. Note that git-rev-list doesn't actually _do_ the diff, it only checks whether the tree is changed. You still just get a list of commits out of it, and it is up to you to decide what to do with it. If you're just going to feed them to diff-tree _anyway_, then you might as well not even do the dense thing, because quite frankly, you're just doing extra work. But let's say that you want to follow a certain filename, what you can do is basically (fake shell syntax with "goto restart") rev=HEAD restart: git-rev-list $rev --dense --parents -- "$filename" | while read commit parent1 restofparents do if [ "$restofparents" ]; then .. it's a merge, do whatever it is you do with merges .. else shaold=$(git-ls-tree $parent -- "$filename") shanew=$(git-ls-tree $commit -- "$filename") # Did it disappear? # Maybe it got renamed from something if [ -z "$shaold" ]; then old=$(git-diff-tree -M -r $commit | grep " R .* $filename") echo "rename? $old" filename=figure-it-out-from-$old rev=$parent goto restart fi git-diff-tree -p $commit -- "$filename" fi while or something similar. In other words: you'll basically have to figure out the renames on your own, and follow the renaming, but something like the above should do it. Linus ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-22 1:26 ` Linus Torvalds @ 2005-10-22 2:56 ` Petr Baudis 2005-10-22 3:30 ` Linus Torvalds 2005-10-22 3:55 ` Junio C Hamano 1 sibling, 1 reply; 15+ messages in thread From: Petr Baudis @ 2005-10-22 2:56 UTC (permalink / raw) To: Linus Torvalds; +Cc: Junio C Hamano, Git Mailing List Dear diary, on Sat, Oct 22, 2005 at 03:26:27AM CEST, I got a letter where Linus Torvalds <torvalds@osdl.org> told me that... > On Sat, 22 Oct 2005, Petr Baudis wrote: > > There is no documentation for the --dense flag and it is even missing > > from the usage string. > > I'm not much for docs ;) > > The whole path argument also isn't even there (and without paths, --dense > doesn't matter) Then it should be added to the docs as well. ;-) Oh well, I guess I can always send a patch when yours will get applied. :-) > > But my main concern is - will it be possible to do the rename detection > > here as well? > > Yes. Note that git-rev-list doesn't actually _do_ the diff, it only checks > whether the tree is changed. You still just get a list of commits out of > it, and it is up to you to decide what to do with it. Ok, fair enough. > If you're just going to feed them to diff-tree _anyway_, then you might as > well not even do the dense thing, because quite frankly, you're just doing > extra work. fork() is awfully expensive for me. fork()ing something (supposedly) trivial (it was stat) slowed my (non-trivial) loop about 4 times. So I think it will still pay off hugely if I will be able do the diff-tree only on the interesting commits. -- Petr "Pasky" Baudis Stuff: http://pasky.or.cz/ VI has two modes: the one in which it beeps and the one in which it doesn't. ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-22 2:56 ` Petr Baudis @ 2005-10-22 3:30 ` Linus Torvalds 0 siblings, 0 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-22 3:30 UTC (permalink / raw) To: Petr Baudis; +Cc: Junio C Hamano, Git Mailing List On Sat, 22 Oct 2005, Petr Baudis wrote: > > > If you're just going to feed them to diff-tree _anyway_, then you might as > > well not even do the dense thing, because quite frankly, you're just doing > > extra work. > > fork() is awfully expensive for me. fork()ing something (supposedly) > trivial (it was stat) slowed my (non-trivial) loop about 4 times. Not fork. More like git-rev-list .. | git-diff-tree --stdin If you do it that way (which is quite common - git-whatchanged, git-log etc), there's no reason to ask for a dense output, because git-diff-tree can handle the non-dense one as fast as git-rev-list can. But the --dense option is wonderful for "gitk", and for "slow stuff". Where "slow" can indeed be a fork, or just an interpreter loop like a shell/perl script, which would fork/exec git-diff-tree for each commit. >From a commit-list standpoint git-rev-list --dense ... -- paths is the same as git-rev-list -- paths | git-diff-tree -s -r --stdin -- paths but it's somewhat more efficient, and more importantly, it rewrites the parents. So quite often, you'd want to use "--dense" together with "--parent" so that you get the "rewritten" history. That's how come gitk didn't need any changes, and it "just worked". It's also how my example script did the rename detection without having to traverse unnecessary commits. Linus ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-22 1:26 ` Linus Torvalds 2005-10-22 2:56 ` Petr Baudis @ 2005-10-22 3:55 ` Junio C Hamano 2005-10-22 20:37 ` Linus Torvalds 1 sibling, 1 reply; 15+ messages in thread From: Junio C Hamano @ 2005-10-22 3:55 UTC (permalink / raw) To: Linus Torvalds; +Cc: Petr Baudis, Git Mailing List Linus Torvalds <torvalds@osdl.org> writes: > Yes. Note that git-rev-list doesn't actually _do_ the diff, it only checks > whether the tree is changed. You still just get a list of commits out of > it, and it is up to you to decide what to do with it. > > If you're just going to feed them to diff-tree _anyway_, then you might as > well not even do the dense thing, because quite frankly, you're just doing > extra work. > > But let's say that you want to follow a certain filename, what you can do > is basically (fake shell syntax with "goto restart") >... > or something similar. Isn't that only true because you are not doing more than "have these paths change" in the new rev-list that already has part of diff-tree? If rev-list can optionally be told to detect renames internally (it has necessary bits after all), it could adjust the set of paths to follow when it sees something got renamed, either by replacing the original path given from the command line with its previous name, or adding its previous name to the set of path limitters (to cover the copy case as well). ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-22 3:55 ` Junio C Hamano @ 2005-10-22 20:37 ` Linus Torvalds 0 siblings, 0 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-22 20:37 UTC (permalink / raw) To: Junio C Hamano; +Cc: Petr Baudis, Git Mailing List On Fri, 21 Oct 2005, Junio C Hamano wrote: > > If rev-list can optionally be told to detect renames internally > (it has necessary bits after all), it could adjust the set of > paths to follow when it sees something got renamed, either by > replacing the original path given from the command line with its > previous name, or adding its previous name to the set of path > limitters (to cover the copy case as well). The problem with renames isn't the straight-line case. The problem with renames is the merge case. And quite frankly, I don't know how to handle that sanely. If everything was straight-line (CVS), renames would be trivial. But git-rev-list very fundamentally works on something that isn't. So let's look at the _fundamental_ problem: - git-rev-list traverses one commit at a time, and it doesn't even _know_ whether it has seem all the parents of that commit during the first phase. The first phase is the "limit_list()" thing, which is when we decide which commits are reachable, uninteresting, and which merges to follow. Now, think about this: since we don't even know that we've seen all parents, that pretty much by definition means that we can't know what has been renamed if we were to track it. - in the second phase (which is where I do the densification), we do actually have the full tree and that makes a lot of things a lot easier to do. When it comes to dense, for example, it means that I know all the UNINTERESTING logic has already been done, and that all merges have been simplified. But in the second phase, we couldn't do rename detection either, since by then, we've already fixed the list of names as far as merges are concerned. And note that this fundamental issue is true _whether_ we have some explicit rename information in a commit or not. Git-rev-list has a few additional issues that make it even more interesting: - git-rev-list fundamentally is designed for multiple heads. There is no one special "origin" head. We might not have _one_ end-point, we might have twenty-five different heads we're looking at at the same time. Try gitk --all --dense -d -- git-fetch.c on the current git archive, and be impressed by how well it works (and yes, you'll see a funny artifact: "--dense" never removes a line of development entirely, so you'll see a single dot for the two "unrelated" lines: "gitk" and "to-do" do not share a root with the rest of them. You'll get a single dot for that root, even if it obviously doesn't actually change "git-fetch.c"). You'll also very clearly see the "Big tool rename" which created that name: it will be the first commit after the root that is visible that way. - path limiters are fundamentally designed to take a list of paths and directories. Personally, I find that to be a lot more important than a single file. That's very efficient the way we do it, but if you were to _change_ the list of paths, you'd basically have to track those changes over history. (This actually happens even with a single path - imagine a merge, and a rename down one line of the merge. You'll have to keep track of different files for the different lines of development, and then when they join together, and the rename only happened in one of them, you'll have to make an executive decision, or just continue to track _both_) So what do I propose? I propose that you realize that "git-rev-list" is just the _building_ block. It does one thing, and one thing only: it creates revision list. A user basically _never_ uses git-rev-list directly: it just doesn't make sense. It's not what git-rev-list is there for. git-rev-list is meant to be used as a base for doing the real work. And that's how you can use it for renaming too. If you think of "git-rev-list --dense -- name" as a fast way to get a set of commits that affect "name", suddenly it all makes sense. You suddenly realize that that's a nice building block for figuring out renames. It's not _all_ of it, but it's a big portion. To go back to "gitk", let's see what the path limitation shows us. Right now, doing a gitk --all --dense -d -- git-fetch.sh only shows that particular name, and that's by design. Maybe that's what the user wants? You have to realize that especially if you remember an _old_ name that may not even exist any more, that's REALLY what you want. Something that works more like "annotate" is useless, because something that works like annotate would just say "I don't have that file, I can't follow renames" and exit. So the first lesson to learn is that following just pure path-names is actually meaningful ON ITS OWN! Sometimes you do NOT want to follow renames. For example, let's say that you used to work on git 4 months ago, but gave up, since it was too rough for you. But you played around with it, and now you're back, and you have an old patch that you want to re-apply, but it doesn't talk about "git-fetch.sh", it talks about "git-fetch-script". So you do gitk --dense -- git-fetch-script and voila, it does the right thing, and top of the thing is "Big tool rename", which tells you _exactly_ what happened to that PATHNAME. See? Static pathnames are important! Now, this does show that when you _do_ care about renames, "gitk" right now doesn't help you very much (no offense to gitk - how could it know that git-rev-list would give it pathname detection one day?). Let's go back to the original example, and see what we could do to make gitk more useful.. gitk --all --dense -d -- git-fetch.sh Go and select that "Big tool rename" thing, and start looking for the rename.. You won't find it. Why? You'll see all-new files, no renames. It turns out that "gitk" follows the "parent" thing a bit _too_ slavishly, which is correct on one level, but in this case with "--dense" it turns out that what you want to do is see only what _that_ commit does, not what that commit did relative to its parent (which was the initial revision). So while "--dense" made gitk work even without any changes, it's clear that the new capability means that gitk might want to have a new button: "show diff against 'fake parent'" and "show diff against 'real parent'". If you want the global view, the default gitk behaviour is correct (it will show a "valid" diff - you'll see everything that changed between the points it shows). But in a rename-centric world, you want the _local_ change to that commit, and right now gitk can't show you that. So for trackign renames, we probably want that as a helper to gitk. Also, gitk has a "re-read references" button, but if you track renames, you probably want to do more than re-read them: you want to re-DO them with the "Big rename" as the new head (forget the old references entirely), and with the name list changed. New functionality (possibly you'd like to havea "New wiew" button, which actually starts a new gitk so that you can see both of them). Right now you'd have to do it by hand: gitk --dense 215a7ad1ef790467a4cd3f0dcffbd6e5f04c38f7 -- git-fetch-script (where 215a.. is the thing you get when you select the "Big tool rename") You'd also probably like to have some way to limit the names shown for the git-diff-tree in gitk. In short, the new "gitk --dense -- filename" doesn't help you nearly as much as it _could_. But when you squint a bit, I think you'll see how it's quite possible to do... Linus ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-21 23:40 Linus Torvalds ` (2 preceding siblings ...) 2005-10-22 0:37 ` Petr Baudis @ 2005-10-25 18:07 ` Jonas Fonseca 2005-10-25 18:23 ` Linus Torvalds 3 siblings, 1 reply; 15+ messages in thread From: Jonas Fonseca @ 2005-10-25 18:07 UTC (permalink / raw) To: Linus Torvalds; +Cc: Junio C Hamano, Git Mailing List Linus Torvalds <torvalds@osdl.org> wrote Fri, Oct 21, 2005: > To see this in action, try something like > > gitk --dense -- gitk > > to see just the history that affects gitk. Is the initial commit supposed to be listed when the file has been added later? I would expect it to only list until (and including) the commit where the file was introduced. prompt > git-rev-list --dense HEAD -- compat/mmap.c f48000fcbe1009c18f1cc46e56cde2cb632071fa 730d48a2ef88a7fb7aa4409d40b1e6964a93267f e83c5163316f89bfbde7d9ab23ca2e25604af290 prompt > git-cat-file commit e83c tree 2b5bfdf7798569e0b59b16eb9602d5fa572d6038 author Linus Torvalds <torvalds@ppc970.osdl.org> 1112911993 -0700 committer Linus Torvalds <torvalds@ppc970.osdl.org> 1112911993 -0700 Initial revision of "git", the information manager from hell Interestingly, for the special gitk, the correct commit is the last rev listed. -- Jonas Fonseca ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-25 18:07 ` Jonas Fonseca @ 2005-10-25 18:23 ` Linus Torvalds 2005-10-25 18:40 ` Jonas Fonseca 2005-10-25 18:50 ` Linus Torvalds 0 siblings, 2 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-25 18:23 UTC (permalink / raw) To: Jonas Fonseca; +Cc: Junio C Hamano, Git Mailing List On Tue, 25 Oct 2005, Jonas Fonseca wrote: > > Is the initial commit supposed to be listed when the file has been added > later? Right now --dense will _always_ show the root commit. I didn't do the logic that does the diff against an empty tree. I was lazy. This patch does that, and may or may not work. Does this match what you expected? Linus ---- diff --git a/rev-list.c b/rev-list.c index 5f125fd..038dae2 100644 --- a/rev-list.c +++ b/rev-list.c @@ -439,6 +439,30 @@ static int same_tree(struct tree *t1, st return !is_different; } +static int same_tree_as_empty(struct tree *t1) +{ + int retval; + void *tree; + struct tree_desc empty, real; + + if (!t1) + return 0; + + tree = read_object_with_reference(t1->object.sha1, "tree", &real.size, NULL); + if (!tree) + return 0; + real.buf = tree; + + empty.buf = ""; + empty.size = 0; + + is_different = 0; + retval = diff_tree(&empty, &real, "", &diff_opt); + free(tree); + + return !retval && !is_different; +} + static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent) { if (!commit->tree) @@ -523,11 +547,17 @@ static void compress_list(struct commit_ struct commit_list *parent = commit->parents; list = list->next; + if (!parent) { + if (!same_tree_as_empty(commit->tree)) + commit->object.flags |= TREECHANGE; + continue; + } + /* * Exactly one parent? Check if it leaves the tree * unchanged */ - if (parent && !parent->next) { + if (!parent->next) { struct tree *t1 = commit->tree; struct tree *t2 = parent->item->tree; if (!t1 || !t2 || same_tree(t1, t2)) ^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-25 18:23 ` Linus Torvalds @ 2005-10-25 18:40 ` Jonas Fonseca 2005-10-25 18:52 ` Linus Torvalds 2005-10-25 18:50 ` Linus Torvalds 1 sibling, 1 reply; 15+ messages in thread From: Jonas Fonseca @ 2005-10-25 18:40 UTC (permalink / raw) To: Linus Torvalds; +Cc: Junio C Hamano, Git Mailing List Linus Torvalds <torvalds@osdl.org> wrote Tue, Oct 25, 2005: > On Tue, 25 Oct 2005, Jonas Fonseca wrote: > > > > Is the initial commit supposed to be listed when the file has been added > > later? > > Right now --dense will _always_ show the root commit. I didn't do the > logic that does the diff against an empty tree. I was lazy. > > This patch does that, and may or may not work. Without the workaround below it segfaults. > Does this match what you expected? Yes, thanks. diff --git a/rev-list.c b/rev-list.c index 5f125fd..82ec656 100644 --- a/rev-list.c +++ b/rev-list.c @@ -87,6 +87,7 @@ static void rewrite_one(struct commit ** struct commit *p = *pp; if (p->object.flags & (TREECHANGE | UNINTERESTING)) return; + if (!p->parents) return; /* Only single-parent commits don't have TREECHANGE */ *pp = p->parents->item; } -- Jonas Fonseca ^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-25 18:40 ` Jonas Fonseca @ 2005-10-25 18:52 ` Linus Torvalds 0 siblings, 0 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-25 18:52 UTC (permalink / raw) To: Jonas Fonseca; +Cc: Junio C Hamano, Git Mailing List On Tue, 25 Oct 2005, Jonas Fonseca wrote: > > Without the workaround below it segfaults. Yes, but your patch only partly helps. It fixes the SIGSEGV, but it still needs to remove the "parents" pointer when the parent ends up NULL. The patch I just sent out should be better. I think. Linus ^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: git-rev-list: add "--dense" flag 2005-10-25 18:23 ` Linus Torvalds 2005-10-25 18:40 ` Jonas Fonseca @ 2005-10-25 18:50 ` Linus Torvalds 1 sibling, 0 replies; 15+ messages in thread From: Linus Torvalds @ 2005-10-25 18:50 UTC (permalink / raw) To: Jonas Fonseca; +Cc: Junio C Hamano, Git Mailing List On Tue, 25 Oct 2005, Linus Torvalds wrote: > > Right now --dense will _always_ show the root commit. I didn't do the > logic that does the diff against an empty tree. I was lazy. > > This patch does that, and may or may not work. Never mind. It's incorrect. It's -close- to being correct, but it will SIGSEGV because now the root won't necessarily have the TREECHANGED flag, so now it can follow the root down to its parents (which is a NULL pointer - that's the definition of a root commit, of course). It needs to remove the parent pointers that become NULL. This patch is even slightly tested, and might do a better job. [ Sorry for sending untested crap out, but I have a fairly good track record in general. Testing is for wimps ] Linus --- diff --git a/rev-list.c b/rev-list.c index 5f125fd..edf3b37 100644 --- a/rev-list.c +++ b/rev-list.c @@ -81,23 +81,28 @@ static void show_commit(struct commit *c fflush(stdout); } -static void rewrite_one(struct commit **pp) +static int rewrite_one(struct commit **pp) { for (;;) { struct commit *p = *pp; if (p->object.flags & (TREECHANGE | UNINTERESTING)) - return; - /* Only single-parent commits don't have TREECHANGE */ + return 0; + if (!p->parents) + return -1; *pp = p->parents->item; } } static void rewrite_parents(struct commit *commit) { - struct commit_list *parent = commit->parents; - while (parent) { - rewrite_one(&parent->item); - parent = parent->next; + struct commit_list **pp = &commit->parents; + while (*pp) { + struct commit_list *parent = *pp; + if (rewrite_one(&parent->item) < 0) { + *pp = parent->next; + continue; + } + pp = &parent->next; } } @@ -439,6 +444,30 @@ static int same_tree(struct tree *t1, st return !is_different; } +static int same_tree_as_empty(struct tree *t1) +{ + int retval; + void *tree; + struct tree_desc empty, real; + + if (!t1) + return 0; + + tree = read_object_with_reference(t1->object.sha1, "tree", &real.size, NULL); + if (!tree) + return 0; + real.buf = tree; + + empty.buf = ""; + empty.size = 0; + + is_different = 0; + retval = diff_tree(&empty, &real, "", &diff_opt); + free(tree); + + return retval >= 0 && !is_different; +} + static struct commit *try_to_simplify_merge(struct commit *commit, struct commit_list *parent) { if (!commit->tree) @@ -523,11 +552,17 @@ static void compress_list(struct commit_ struct commit_list *parent = commit->parents; list = list->next; + if (!parent) { + if (!same_tree_as_empty(commit->tree)) + commit->object.flags |= TREECHANGE; + continue; + } + /* * Exactly one parent? Check if it leaves the tree * unchanged */ - if (parent && !parent->next) { + if (!parent->next) { struct tree *t1 = commit->tree; struct tree *t2 = parent->item->tree; if (!t1 || !t2 || same_tree(t1, t2)) ^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2005-10-25 18:52 UTC | newest] Thread overview: 15+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2005-10-23 19:30 git-rev-list: add "--dense" flag Marco Costalba -- strict thread matches above, loose matches on Subject: below -- 2005-10-21 23:40 Linus Torvalds 2005-10-21 23:47 ` Linus Torvalds 2005-10-21 23:55 ` Linus Torvalds 2005-10-22 0:37 ` Petr Baudis 2005-10-22 1:26 ` Linus Torvalds 2005-10-22 2:56 ` Petr Baudis 2005-10-22 3:30 ` Linus Torvalds 2005-10-22 3:55 ` Junio C Hamano 2005-10-22 20:37 ` Linus Torvalds 2005-10-25 18:07 ` Jonas Fonseca 2005-10-25 18:23 ` Linus Torvalds 2005-10-25 18:40 ` Jonas Fonseca 2005-10-25 18:52 ` Linus Torvalds 2005-10-25 18:50 ` Linus Torvalds
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).