git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
@ 2006-02-08 14:52 Ryan Anderson
  2006-02-08 15:09 ` Peter Eriksen
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Ryan Anderson @ 2006-02-08 14:52 UTC (permalink / raw)
  To: Junio C Hamano, git; +Cc: Ryan Anderson

Signed-off-by: Ryan Anderson <ryan@michonline.com>

---

I think this version is mostly ready to go.

Junio, the post you pointed me at was very helpful (once I got around to
listening to it), but the code it links to is missing - if that's a
better partial implementation than this, can you ressurrect it
somewhere?  I'd be happy to reintegrate it together.

 Makefile          |    1 
 git-annotate.perl |  291 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 292 insertions(+), 0 deletions(-)
 create mode 100755 git-annotate.perl

86fa163e7fd1bee2929b7946456407dbc7745193
diff --git a/Makefile b/Makefile
index 5c32934..8d24660 100644
--- a/Makefile
+++ b/Makefile
@@ -117,6 +117,7 @@ SCRIPT_SH = \
 SCRIPT_PERL = \
 	git-archimport.perl git-cvsimport.perl git-relink.perl \
 	git-shortlog.perl git-fmt-merge-msg.perl git-rerere.perl \
+	git-annotate.perl \
 	git-svnimport.perl git-mv.perl git-cvsexportcommit.perl
 
 SCRIPT_PYTHON = \
diff --git a/git-annotate.perl b/git-annotate.perl
new file mode 100755
index 0000000..a3ea201
--- /dev/null
+++ b/git-annotate.perl
@@ -0,0 +1,291 @@
+#!/usr/bin/perl
+# Copyright 2006, Ryan Anderson <ryan@michonline.com>
+#
+# GPL v2 (See COPYING)
+#
+# This file is licensed under the GPL v2, or a later version
+# at the discretion of Linus Torvalds.
+
+use warnings;
+use strict;
+
+use Data::Dumper;
+
+my $filename = shift @ARGV;
+
+
+my @stack = (
+	{
+		'rev' => "HEAD",
+		'filename' => $filename,
+	},
+);
+
+our (@lineoffsets, @pendinglineoffsets);
+our @filelines = ();
+open(F,"<",$filename)
+	or die "Failed to open filename: $!";
+
+while(<F>) {
+	chomp;
+	push @filelines, $_;
+}
+close(F);
+our $leftover_lines = @filelines;
+our %revs;
+our @revqueue;
+our $head;
+
+my $revsprocessed = 0;
+while (my $bound = pop @stack) {
+	my @revisions = git_rev_list($bound->{'rev'}, $bound->{'filename'});
+	foreach my $revinst (@revisions) {
+		my ($rev, @parents) = @$revinst;
+		$head ||= $rev;
+
+		if (scalar @parents > 0) {
+			$revs{$rev}{'parents'} = \@parents;
+			$revs{$rev}{'filename'} = $bound->{'filename'};
+			next;
+		}
+
+		my $newbound = find_parent_renames($rev, $bound->{'filename'});
+		if ( exists $newbound->{'filename'} && $newbound->{'filename'} ne $bound->{'filename'}) {
+			push @stack, $newbound;
+			$revs{$rev}{'parents'} = [$newbound->{'rev'}];
+		}
+	}
+}
+push @revqueue, $head;
+init_claim($head);
+$revs{$head}{'lineoffsets'} = {};
+handle_rev();
+
+
+my $i = 0;
+foreach my $l (@filelines) {
+	my ($output, $rev, $committer, $date);
+	if (ref $l eq 'ARRAY') {
+		($output, $rev, $committer, $date) = @$l;
+		if (length($rev) > 8) {
+			$rev = substr($rev,0,8);
+		}
+	} else {
+		$output = $l;
+		($rev, $committer, $date) = ('unknown', 'unknown', 'unknown');
+	}
+
+	printf("(%8s %10s %10s %d)%s\n", $rev, $committer, $date, $i++, $output);
+}
+
+sub init_claim {
+	my ($rev) = @_;
+	for (my $i = 0; $i < @filelines; $i++) {
+		$filelines[$i] = [ $filelines[$i], $rev, 'unknown', 'unknown', 0];
+			# line,
+			# rev,
+			# author,
+			# date,
+			# confirmed to actually belong to this rev (0 = tentative)
+	}
+}
+
+
+sub handle_rev {
+	my $i = 0;
+	while (my $rev = shift @revqueue) {
+
+		my %revinfo = git_commit_info($rev);
+
+		foreach my $p (@{$revs{$rev}{'parents'}}) {
+
+			my $nlineoffsets = {%{$revs{$rev}{'lineoffsets'}}};
+			git_line_assign($p, $rev, $revs{$p}{'filename'}, $nlineoffsets,
+				%revinfo);
+			push @revqueue, $p;
+			$revs{$p}{'lineoffsets'} = $nlineoffsets;
+		}
+
+		for (my $i = 0; $i < @filelines; $i++) {
+			if ($filelines[$i][1] eq $rev) {
+				claim_line($i, $rev, %revinfo);
+			}
+		}
+
+		if (scalar @{$revs{$rev}{parents}} == 0) {
+			# We must be at the initial rev here, so claim everything that is left.
+			for (my $i = 0; $i < @filelines; $i++) {
+				if (ref $filelines[$i] eq '') {
+					claim_line($i, $rev, %revinfo);
+				}
+			}
+		}
+	
+		return 1 if all_lines_claimed();
+	}	
+}
+
+
+sub git_rev_list {
+	my ($rev, $file) = @_;
+	#printf("grl = %s, %s\n", $rev, $file);
+
+# 	printf("Calling: %s\n",join(" ","git-rev-list","--parents","--remove-empty",$rev,"--",$file));
+	open(P,"-|","git-rev-list","--parents","--remove-empty",$rev,"--",$file)
+		or die "Failed to exec git-rev-list: $!";
+
+	my @revs;
+	while(my $line = <P>) {
+# 		print $line;
+		chomp $line;
+		my ($rev, @parents) = split /\s+/, $line;
+		push @revs, [ $rev, @parents ];
+	}
+	close(P);
+
+	printf("0 revs found for rev %s (%s)\n", $rev, $file) if (@revs == 0);
+	return @revs;
+}
+
+sub find_parent_renames {
+	my ($rev, $file) = @_;
+
+	open(P,"-|","git-diff", "-r","--name-status", "-z","$rev^1..$rev")
+		or die "Failed to exec git-diff: $!";
+
+	local $/ = "\0";
+	my %bound;
+	while (my $change = <P>) {
+		chomp $change;
+		my $filename = <P>;
+		chomp $filename;
+
+		if ($change =~ m/^[AMD]$/ ) {
+			next;
+		} elsif ($change =~ m/^R/ ) {
+			my $oldfilename = $filename;
+			$filename = <P>;
+			chomp $filename;
+			if ( $file eq $filename ) {
+				my $parent = git_find_parent($rev);
+				#printf("Found rename at boundary: %s-%s, %s\n", $rev, $parent, $oldfilename);
+				@bound{'rev','filename'} = ($parent, $oldfilename);
+
+				last;
+			} else {
+				#printf("Found unknown rename of %s => %s\n", $oldfilename, $filename);
+			}
+		} else {
+			#printf("Unknown name-status type of '%s'\n", $change);
+		}
+	}
+	close(P);
+
+	return \%bound;
+}
+
+
+sub git_find_parent {
+	my ($rev) = @_;
+
+	open(REVPARENT,"-|","git-rev-list","--parents","$rev^1..$rev")
+		or die "Failed to open git-rev-list to find a single parent: $!";
+
+	my $parentline = <REVPARENT>;
+	chomp $parentline;
+	my ($revfound,$parent) = split m/\s+/, $parentline;
+
+	close(REVPARENT);
+
+	return $parent;
+}
+
+
+# Examine a revision to see if it has unclaimed lines that we have,
+# if so, give those lines to that revision.
+sub git_line_assign {
+	my ($parent, $rev, $filename, $lineoffsets, %revinfo) = @_;
+
+	my @plines = git_cat_file($parent, $filename);
+
+	my ($i, $j, $jbase) = (0,0,0);
+	while ($i < @filelines && $filelines[$i][1] ne $rev) {
+		$i++;
+	}
+
+	if ($i == @filelines) {
+		printf("Skipping diff-parse - i = filelines)\n");
+	}
+	return if $i == @filelines;
+
+	while($i < @filelines && $j < @plines) {
+		if ($filelines[$i][0] eq $plines[$j]) {
+			# Our parent has this line, give it away.
+			$filelines[$i][1] = $parent;
+			$jbase = $j;
+			$i++;
+			$j++;
+			
+		} elsif ($j+1 == @plines) {
+			$i++;
+			$j = $jbase;
+		} else {
+			$j++;
+		}
+	}
+}
+
+sub git_cat_file {
+	my ($parent, $filename) = @_;
+	return () unless defined $parent && defined $filename;
+	my $blobline = `git-ls-tree $parent $filename`;
+	my ($mode, $type, $blob, $tfilename) = split(/\s+/, $blobline, 4);
+
+	open(C,"-|","git-cat-file", "blob", $blob)
+		or die "Failed to git-cat-file blob $blob (rev $parent, file $filename): " . $!;
+
+	my @lines;
+	while(<C>) {
+		chomp;
+		push @lines, $_;
+	}
+	close(C);
+
+	return @lines;
+}
+
+
+sub claim_line {
+	my ($floffset, $rev, %revinfo) = @_;
+	my $oline = $filelines[$floffset][0];
+	$filelines[$floffset] =	[ $oline, $rev,
+		$revinfo{'author'}, $revinfo{'author_date'} ];
+	$leftover_lines--;
+	printf("Claiming line %d with rev %s: '%s'\n",
+			$floffset, $rev, $oline) if 0;
+}
+
+sub git_commit_info {
+	my ($rev) = @_;
+	open(COMMIT, "-|","git-cat-file", "commit", $rev)
+		or die "Failed to call git-cat-file: $!";
+
+	my %info;
+	while(<COMMIT>) {
+		chomp;
+		last if (length $_ == 0);
+
+		if (m/^author (.*) <(.*)> (.*)$/) {
+			$info{'author'} = $1;
+			$info{'author_email'} = $2;
+			$info{'author_date'} = $3;
+		} elsif (m/^committer (.*) <(.*)> (.*)$/) {
+			$info{'committer'} = $1;
+			$info{'committer_email'} = $2;
+			$info{'committer_date'} = $3;
+		}
+	}
+	close(COMMIT);
+
+	return %info;
+}
-- 
1.1.6.g3b91b

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 14:52 [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file Ryan Anderson
@ 2006-02-08 15:09 ` Peter Eriksen
  2006-02-08 16:05   ` Johannes Schindelin
  2006-02-08 16:35 ` Franck Bui-Huu
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 19+ messages in thread
From: Peter Eriksen @ 2006-02-08 15:09 UTC (permalink / raw)
  To: git

On Wed, Feb 08, 2006 at 09:52:55AM -0500, Ryan Anderson wrote:
> Signed-off-by: Ryan Anderson <ryan@michonline.com>
> 
> ---
> 
> I think this version is mostly ready to go.
> 
> Junio, the post you pointed me at was very helpful (once I got around to
> listening to it), but the code it links to is missing - if that's a
> better partial implementation than this, can you ressurrect it
> somewhere?  I'd be happy to reintegrate it together.

Does it depends on some ealier patch?  I get this:

git]$ git-annotate diff-delta.c
Undefined subroutine &main::all_lines_claimed called at
/home/peter/bin/git-annotate line 124.

The patch was applied to: git version 1.1.6.gd19e-dirty.

Peter

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 15:09 ` Peter Eriksen
@ 2006-02-08 16:05   ` Johannes Schindelin
  0 siblings, 0 replies; 19+ messages in thread
From: Johannes Schindelin @ 2006-02-08 16:05 UTC (permalink / raw)
  To: Peter Eriksen; +Cc: git

Hi,

On Wed, 8 Feb 2006, Peter Eriksen wrote:

> On Wed, Feb 08, 2006 at 09:52:55AM -0500, Ryan Anderson wrote:
> > Signed-off-by: Ryan Anderson <ryan@michonline.com>
> > 
> > ---
> > 
> > I think this version is mostly ready to go.
> > 
> > Junio, the post you pointed me at was very helpful (once I got around to
> > listening to it), but the code it links to is missing - if that's a
> > better partial implementation than this, can you ressurrect it
> > somewhere?  I'd be happy to reintegrate it together.
> 
> Does it depends on some ealier patch?  I get this:
> 
> git]$ git-annotate diff-delta.c
> Undefined subroutine &main::all_lines_claimed called at
> /home/peter/bin/git-annotate line 124.

Just add a function like

-- snip --
sub all_lines_claimed {
        return ($leftover_lines == 0);
}
-- snap --

and you're done.

However, it does not yet do the correct thing: it does not show the root 
commit. For example, if you do "git annotate git-am.sh" it should show 
"d1c5f2a4" for the first lines, not "a1451104" as it does.

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 14:52 [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file Ryan Anderson
  2006-02-08 15:09 ` Peter Eriksen
@ 2006-02-08 16:35 ` Franck Bui-Huu
  2006-02-08 17:45   ` Johannes Schindelin
  2006-02-08 18:47   ` Randal L. Schwartz
  2006-02-08 19:09 ` Linus Torvalds
  2006-02-08 19:51 ` Junio C Hamano
  3 siblings, 2 replies; 19+ messages in thread
From: Franck Bui-Huu @ 2006-02-08 16:35 UTC (permalink / raw)
  To: Ryan Anderson; +Cc: Junio C Hamano, git

2006/2/8, Ryan Anderson <ryan@michonline.com>:
> Signed-off-by: Ryan Anderson <ryan@michonline.com>
>
> ---
>
> I think this version is mostly ready to go.
>

another perl script :(

Are there any rules on the choice of the script language ?

> Junio, the post you pointed me at was very helpful (once I got around to
> listening to it), but the code it links to is missing - if that's a
> better partial implementation than this, can you ressurrect it
> somewhere?  I'd be happy to reintegrate it together.
>

Thanks
--
               Franck

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 16:35 ` Franck Bui-Huu
@ 2006-02-08 17:45   ` Johannes Schindelin
  2006-02-08 19:19     ` Junio C Hamano
  2006-02-08 18:47   ` Randal L. Schwartz
  1 sibling, 1 reply; 19+ messages in thread
From: Johannes Schindelin @ 2006-02-08 17:45 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Junio C Hamano, git

Hi,

On Wed, 8 Feb 2006, Franck Bui-Huu wrote:

> another perl script :(
> 
> Are there any rules on the choice of the script language ?

Yes. Do not try to introduce unnecessary dependencies. But if it is 
the right tool to do the job, you should use it. As of now, we have perl, 
python and Tcl/Tk.

Hth,
Dscho

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with  the revision and person that created each line in the file.
  2006-02-08 16:35 ` Franck Bui-Huu
  2006-02-08 17:45   ` Johannes Schindelin
@ 2006-02-08 18:47   ` Randal L. Schwartz
  2006-02-09  9:19     ` Andreas Ericsson
  1 sibling, 1 reply; 19+ messages in thread
From: Randal L. Schwartz @ 2006-02-08 18:47 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Ryan Anderson, Junio C Hamano, git

>>>>> "Franck" == Franck Bui-Huu <vagabon.xyz@gmail.com> writes:

Franck> another perl script :(

Franck> Are there any rules on the choice of the script language ?

I could argue that they should all be Perl. :)

-- 
Randal L. Schwartz - Stonehenge Consulting Services, Inc. - +1 503 777 0095
<merlyn@stonehenge.com> <URL:http://www.stonehenge.com/merlyn/>
Perl/Unix/security consulting, Technical writing, Comedy, etc. etc.
See PerlTraining.Stonehenge.com for onsite and open-enrollment Perl training!

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 14:52 [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file Ryan Anderson
  2006-02-08 15:09 ` Peter Eriksen
  2006-02-08 16:35 ` Franck Bui-Huu
@ 2006-02-08 19:09 ` Linus Torvalds
  2006-02-08 19:51 ` Junio C Hamano
  3 siblings, 0 replies; 19+ messages in thread
From: Linus Torvalds @ 2006-02-08 19:09 UTC (permalink / raw)
  To: Ryan Anderson; +Cc: Junio C Hamano, git



On Wed, 8 Feb 2006, Ryan Anderson wrote:
> 
> I think this version is mostly ready to go.

Hmm.. I get

   [torvalds@g5 git]$ ./git-annotate Makefile
   fatal: 'e83c5163316f89bfbde7d9ab23ca2e25604af290^1..e83c5163316f89bfbde7d9ab23ca2e25604af290': No such file or directory
   Undefined subroutine &main::all_lines_claimed called at ./git-annotate line 124.

where that fatal error is because e83c51.. doesn't _have_ a parent, it's 
the root (so doing ^1 on it doesn't work).

After fixing the "all_lines_claimed" problem as outlined by Dscho, I get a 
lot of

	Skipping diff-parse - i = filelines)

and no actual output.

Doing it on a file that didn't exist in the root commit still have those 
"Skipping" messages, but at least it did actually output something. 

However, what it output was clearly not correct, so there's still some 
tweaking to do.

For example, doing

	./git-annotate apply.c

annotates most of that file to Junio's commit 1c15afb9, which is totally 
incorrect, that commit actually only changed a few lines.

So it looks like there's still some work to be done on this..

			Linus

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 17:45   ` Johannes Schindelin
@ 2006-02-08 19:19     ` Junio C Hamano
  2006-02-10 11:25       ` Ralf Baechle
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-02-08 19:19 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Franck Bui-Huu, git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

>> Are there any rules on the choice of the script language ?
>
> Yes. Do not try to introduce unnecessary dependencies. But if it is 
> the right tool to do the job, you should use it. As of now, we have perl, 
> python and Tcl/Tk.

Very well said.  That's what currently stands.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 14:52 [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file Ryan Anderson
                   ` (2 preceding siblings ...)
  2006-02-08 19:09 ` Linus Torvalds
@ 2006-02-08 19:51 ` Junio C Hamano
  2006-02-08 21:07   ` Ryan Anderson
  3 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-02-08 19:51 UTC (permalink / raw)
  To: Ryan Anderson; +Cc: git

Ryan Anderson <ryan@michonline.com> writes:

> Signed-off-by: Ryan Anderson <ryan@michonline.com>
>
> ---
>
> I think this version is mostly ready to go.
>
> Junio, the post you pointed me at was very helpful (once I got around to
> listening to it), but the code it links to is missing - if that's a
> better partial implementation than this, can you ressurrect it
> somewhere?  I'd be happy to reintegrate it together.

I still have it, but the reason why I withdrew circulating it
was because I found that on some inputs it did not work
correctly as intended.  Not that the algorithm was necessarily
broken but the implementation certainly was.

Unlike yours mine reads and interprets diff output to find which
lines are common and which lines are added, and I think the diff
interpretation logic has various corner cases wrong.  I did
combine-diff.c diff interpreter without looking at my
'git-blame', so I do not remember where I got it wrong,
though...

It's been a while since I looked at it the last time so it may
not even work with the current git, but here it is..

--
#!/usr/bin/perl -w

use strict;

package main;
$::debug = 0;

sub read_blob {
    my $sha1 = shift;
    my $fh = undef;
    my $result;
    local ($/) = undef;
    open $fh, '-|', 'git-cat-file', 'blob', $sha1
	or die "cannot read blob $sha1";
    $result = join('', <$fh>);
    close $fh
	or die "failure while closing pipe to git-cat-file";
    return $result;
}

sub read_diff_raw {
    my ($parent, $filename) = @_;
    my $fh = undef;
    local ($/) = "\0";
    my @result = (); 
    my ($meta, $status, $sha1_1, $sha1_2, $file1, $file2);

    print STDERR "* diff-index --cached $parent $filename\n" if $::debug;
    my $has_changes = 0;
    open $fh, '-|', 'git-diff-index', '--cached', '-z', $parent, $filename
	or die "cannot read git-diff-index $parent $filename";
    while (defined ($meta = <$fh>)) {
	$has_changes = 1;
    }
    close $fh
	or die "failure while closing pipe to git-diff-index";
    if (!$has_changes) {
	return ();
    }

    $fh = undef;
    print STDERR "* diff-index -B -C --find-copies-harder --cached $parent\n" if $::debug;
    open($fh, '-|', 'git-diff-index', '-B', '-C', '--find-copies-harder',
	 '--cached', '-z', $parent)
	or die "cannot read git-diff-index with $parent";
    while (defined ($meta = <$fh>)) {
	chomp($meta);
	(undef, undef, $sha1_1, $sha1_2, $status) = split(/ /, $meta);
	$file1 = <$fh>;
	chomp($file1);
	if ($status =~ /^[CR]/) {
	    $file2 = <$fh>;
	    chomp($file2);
	} elsif ($status =~ /^D/) {
	    next;
	} else {
	    $file2 = $file1;
	}
	if ($file2 eq $filename) {
	    push @result, [$status, $sha1_1, $sha1_2, $file1, $file2];
	}
    }
    close $fh
	or die "failure while closing pipe to git-diff-index";
    return @result;
}

sub write_temp_blob {
    my ($sha1, $temp) = @_;
    my $fh = undef;
    my $blob = read_blob($sha1);
    open $fh, '>', $temp
	or die "cannot open temporary file $temp";
    print $fh $blob;
    close($fh);
}

package Git::Patch;
sub new {
    my ($class, $sha1_1, $sha1_2) = @_;
    my $self = bless [], $class;
    my $fh = undef;
    ::write_temp_blob($sha1_1, "/tmp/blame-$$-1");
    ::write_temp_blob($sha1_2, "/tmp/blame-$$-2");
    open $fh, '-|', 'diff', '-u0', "/tmp/blame-$$-1", "/tmp/blame-$$-2"
	or die "cannot read diff";
    while (<$fh>) {
	if (/^\@\@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? \@\@/) {
	    push @$self, [$1, (defined $2 ? $2 : 1),
			  $3, (defined $4 ? $4 : 1)];
	}
    }
    close $fh;
    unlink "/tmp/blame-$$-1", "/tmp/blame-$$-2";
    return $self;
}

sub find_parent_line {
    my ($self, $commit_lineno) = @_;
    my $ofs = 0;
    for (@$self) {
	my ($line_1, $len_1, $line_2, $len_2) = @$_;
	if ($commit_lineno < $line_2) {
	    return $commit_lineno - $ofs;
	}
	if ($line_2 <= $commit_lineno && $commit_lineno < $line_2 + $len_2) {
	    return -1; # changed by commit.
	}
	$ofs += ($len_1 - $len_2);
    }
    return $commit_lineno + $ofs;
}

package Git::Commit;

my %author_name_canon = 
('Linus Torvalds <torvalds@evo.osdl.org>' =>
 'Linus Torvalds <torvalds@osdl.org>',
 'Linus Torvalds <torvalds@ppc970.osdl.org.(none)>' =>
 'Linus Torvalds <torvalds@osdl.org>',
 'Linus Torvalds <torvalds@ppc970.osdl.org>' =>
 'Linus Torvalds <torvalds@osdl.org>',
 'Linus Torvalds <torvalds@g5.osdl.org>' =>
 'Linus Torvalds <torvalds@osdl.org>',
 'Matthias Urlichs <smurf@kiste.(none)>' =>
 'Matthias Urlichs <smurf@smurf.noris.de>',
 'Paul Mackerras <paulus@dorrigo.(none)>' =>
 'Paul Mackerras <paulus@samba.org>',
 'Paul Mackerras <paulus@pogo.(none)>' =>
 'Paul Mackerras <paulus@samba.org>',
 'Petr Baudis <pasky@ucw.cz>' =>
 'Petr Baudis <pasky@suse.cz>',
 'tony.luck@intel.com <tony.luck@intel.com>' =>
 'Tony Luck <tony.luck@intel.com>',
 'barkalow@iabervon.org <barkalow@iabervon.org>' =>
 'Daniel Barkalow <barkalow@iabervon.org>',
 'jon@blackcubes.dyndns.org <jon@blackcubes.dyndns.org>' =>
 'Jon Seymour <jon.seymour@gmail.com>',
 'Sven Verdoolaege <skimo@kotnet.org>' =>
 'Sven Verdoolaege <skimo@liacs.nl>',
 'Bryan Larsen <bryanlarsen@yahoo.com>' =>
 'Bryan Larsen <bryan.larsen@gmail.com>',
 'Junio C Hamano <junio@twinsun.com>' =>
 'Junio C Hamano <junkio@cox.net>',
 );

sub canon_author_name {
    my ($name) = @_;
    if (exists $author_name_canon{$name}) {
	return $author_name_canon{$name};
    }
    return $name;
}

sub new {
    my $class = shift;
    my $self = bless {
	PARENT => [],
	TREE => undef,
	AUTHOR => undef,
	COMMITTER => undef,
    }, $class;
    my $commit_sha1 = shift;
    $self->{SHA1} = $commit_sha1;
    my $fh = undef;
    open $fh, '-|', 'git-cat-file', 'commit', $commit_sha1
	or die "cannot read commit object $commit_sha1";
    while (<$fh>) {
	chomp;
	if (/^tree ([0-9a-f]{40})$/) { $self->{TREE} = $1; }
	elsif (/^parent ([0-9a-f]{40})$/) { push @{$self->{PARENT}}, $1; }
	elsif (/^author ([^>]+>)/) {
	    $self->{AUTHOR} = canon_author_name($1);
	}
	elsif (/^committer ([^>]+>)/) {
	    $self->{COMMITTER} = canon_author_name($1);
	}
    }
    close $fh
	or die "failure while closing pipe to git-cat-file";
    return $self;
}

sub find_file {
    my ($commit, $path) = @_;
    my $result = undef;
    my $fh = undef;
    local ($/) = "\0";
    open $fh, '-|', 'git-ls-tree', '-z', '-r', '-d', $commit->{TREE}, $path
	or die "cannot read git-ls-tree $commit->{TREE}";
    while (<$fh>) {
	chomp;
	if (/^[0-7]{6} blob ([0-9a-f]{40})	(.*)$/) {
	    if ($2 ne $path) {
		die "$2 ne $path???";
	    }
	    $result = $1;
	    last;
	}
    }
    close $fh
	or die "failure while closing pipe to git-ls-tree";
    return $result;
}

package Git::Blame;
sub new {
    my $class = shift;
    my $self = bless {
	LINE => [],
	UNKNOWN => undef,
	WORK => [],
    }, $class;
    my $commit = shift;
    my $filename = shift;
    my $sha1 = $commit->find_file($filename);
    my $blob = ::read_blob($sha1);
    my @blob = (split(/\n/, $blob));
    for (my $i = 0; $i < @blob; $i++) {
	$self->{LINE}[$i] = +{
	    COMMIT => $commit,
	    FOUND => undef,
	    FILENAME => $filename,
	    LINENO => ($i + 1),
	};
    }
    $self->{UNKNOWN} = scalar @blob;
    push @{$self->{WORK}}, [$commit, $filename];
    return $self;
}

sub read_blame_cache {
    my $self = shift;
    my $filename = shift;
    my $fh = undef;
    my $pi = $self->{'PATHINFO'} = {};
    open $fh, '<', $filename;
    while (<$fh>) {
	chomp;
	my ($commit, $parent, $path) = split(/\t/, $_);
	$pi->{$path}{$commit}{$parent} = 1;
    }
    close $fh;
}

sub print {
    my $self = shift;
    my $line_termination = shift;
    for (my $i = 0; $i < @{$self->{LINE}}; $i++) {
	my $l = $self->{LINE}[$i];
	print ($l->{FOUND} ? ':' : '?');;
	print "$l->{COMMIT}->{SHA1}	";
	print "$l->{COMMIT}->{AUTHOR}	";
	print "$l->{COMMIT}->{COMMITTER}	";
	print "$l->{LINENO}	$l->{FILENAME}";
	print $line_termination;
    }
}

sub take_responsibility {
    my ($self, $commit) = @_;
    for (my $i = 0; $i < @{$self->{LINE}}; $i++) {
	my $l = $self->{LINE}[$i];
	if (! $l->{FOUND} && ($l->{COMMIT}->{SHA1} eq $commit->{SHA1})) {
	    $l->{FOUND} = 1;
	    $self->{UNKNOWN}--;
	}
    }
}

sub blame_parent {
    my ($self, $commit, $parent, $filename) = @_;
    my @diff = ::read_diff_raw($parent->{SHA1}, $filename);
    my $filename_in_parent;
    my $passed_blame_to_parent = undef;
    if (@diff == 0) {
	# We have not touched anything.  Blame parent for everything
	# that we are suspected for.
	for (my $i = 0; $i < @{$self->{LINE}}; $i++) {
	    my $l = $self->{LINE}[$i];
	    if (! $l->{FOUND} && ($l->{COMMIT}->{SHA1} eq $commit->{SHA1})) {
		$l->{COMMIT} = $parent;
		$passed_blame_to_parent = 1;
	    }
	}
	$filename_in_parent = $filename;
    }
    elsif (@diff != 1) {
	# This should not happen.
	for (@diff) {
	    print "** @$_\n";
	}
	die "Oops";
    }
    else {
	my ($status, $sha1_1, $sha1_2, $file1, $file2) = @{$diff[0]};
	print STDERR "** $status $file1 $file2\n" if $::debug;
	if ($status =~ /A/ || $status =~ /M[0-9][0-9]/) {
	    # Either some of other parents created it, or we did.
	    # At this point the only thing we know is that this
	    # parent is not responsible for it.
	    ;
	}
	else {
	    my $patch = Git::Patch->new($sha1_1, $sha1_2);
	    $filename_in_parent = $file1;
	    for (my $i = 0; $i < @{$self->{LINE}}; $i++) {
		my $l = $self->{LINE}[$i];
		if (! $l->{FOUND} && $l->{COMMIT}->{SHA1} eq $commit->{SHA1}) {
		    # We are suspected to have introduced this line.
		    # Does it exist in the parent?
		    my $lineno = $l->{LINENO};
		    my $parent_line = $patch->find_parent_line($lineno);
		    if ($parent_line < 0) {
			# No, we may be the guilty ones, or some other
			# parent might be.  We do not assign blame to
			# ourselves here yet.
			;
		    }
		    else {
			# This line is coming from the parent, so pass
			# blame to it.
			$l->{COMMIT} = $parent;
			$l->{FILENAME} = $file1;
			$l->{LINENO} = $parent_line;
			$passed_blame_to_parent = 1;
		    }
		}
	    }
	}
    }
    if ($passed_blame_to_parent && $self->{UNKNOWN}) {
	unshift @{$self->{WORK}},
	[$parent, $filename_in_parent];
    }
}

sub assign {
    my ($self, $commit, $filename) = @_;
    # We do read-tree of the current commit and diff-index
    # with each parents, instead of running diff-tree.  This
    # is because diff-tree does not look for copies hard enough.

    if (exists $self->{'PATHINFO'} && exists $self->{'PATHINFO'}{$filename} &&
	!exists $self->{'PATHINFO'}{$filename}{$commit->{SHA1}} &&
	@{$commit->{PARENT}} == 1) {
	# This commit did not touch the path at all, and
	# has only one parent.  It is all that parent's fault.

	my $parent = Git::Commit->new($commit->{PARENT}[0]);
	my $passed_blame_to_parent = 0;
	for (my $i = 0; $i < @{$self->{LINE}}; $i++) {
	    my $l = $self->{LINE}[$i];
	    if (! $l->{FOUND} &&
		($l->{COMMIT}->{SHA1} eq $commit->{SHA1})) {
		$l->{COMMIT} = $parent;
		$passed_blame_to_parent = 1;
	    }
	}
	if ($passed_blame_to_parent && $self->{UNKNOWN}) {
	    unshift @{$self->{WORK}},
	    [$parent, $filename];
	}
	return;
    }

    print STDERR "* read-tree  $commit->{SHA1}\n" if $::debug;
    system('git-read-tree', '-m', $commit->{SHA1});
    for my $parent (@{$commit->{PARENT}}) {
	$self->blame_parent($commit, Git::Commit->new($parent), $filename);
    }
    $self->take_responsibility($commit);
}

sub assign_blame {
    my ($self) = @_;
    while ($self->{UNKNOWN} && @{$self->{WORK}}) {
	my $wk = shift @{$self->{WORK}};
	my ($commit, $filename) = @$wk;
	$self->assign($commit, $filename);
    }
}



################################################################
package main;
my $usage = "blame [-z] <commit> filename";
my $line_termination = "\n";

$::ENV{GIT_INDEX_FILE} = "/tmp/blame-$$-index";
unlink($::ENV{GIT_INDEX_FILE});

if ($ARGV[0] eq '-z') {
    $line_termination = "\0";
    shift;
}

if (@ARGV != 2) {
    die $usage;
}

my $head_commit = Git::Commit->new($ARGV[0]);
my $filename = $ARGV[1];
my $blame = Git::Blame->new($head_commit, $filename);
if (-f ".blame-cache") {
    $blame->read_blame_cache(".blame-cache");
}

$blame->assign_blame();
$blame->print($line_termination);

unlink($::ENV{GIT_INDEX_FILE});

__END__

How does this work, and what do we do about merges?

The algorithm considers that the first parent is our main line of
development and treats it somewhat special than other parents.  So we
pass on the blame to the first parent if a line has not changed from
it.  For lines that have changed from the first parent, we must have
either inherited that change from some other parent, or it could have
been merge conflict resolution edit we did on our own.

The following picture illustrates how we pass on and assign blames.

In the sample, the original O was forked into A and B and then merged
into M.  Line 1, 2, and 4 did not change.  Line 3 and 5 are changed in
A, and Line 5 and 6 are changed in B.  M made its own decision to
resolve merge conflicts at Line 5 to something different from A and B:

                A: 1 2 T 4 T 6
               /               \ 
O: 1 2 3 4 5 6                  M: 1 2 T 4 M S
               \               / 
                B: 1 2 3 4 S S

In the following picture, each line is annotated with a blame letter.
A lowercase blame (e.g. "a" for "1") means that commit or its ancestor
is the guilty party but we do not know which particular ancestor is
responsible for the change yet.  An uppercase blame means that we know
that commit is the guilty party.

First we look at M (the HEAD) and initialize Git::Blame->{LINE} like
this:

             M: 1 2 T 4 M S
                m m m m m m

That is, we know all lines are results of modification made by some
ancestor of M, so we assign lowercase 'm' to all of them.

Then we examine our first parent A.  Throughout the algorithm, we are
always only interested in the lines we are the suspect, but this being
the initial round, we are the suspect for all of them.  We notice that
1 2 T 4 are the same as the parent A, so we pass the blame for these
four lines to A.  M and S are different from A, so we leave them as
they are (note that we do not immediately take the blame for them):

             M: 1 2 T 4 M S
                a a a a m m

Next we go on to examine parent B.  Again, we are only interested in
the lines we are still the suspect (i.e. M and S).  We notice S is
something we inherited from B, so we pass the blame on to it, like
this:

             M: 1 2 T 4 M S
                a a a a m b

Once we exhausted the parents, we look at the results and take
responsibility for the remaining ones that we are still the suspect:

             M: 1 2 T 4 M S
                a a a a M b

We are done with M.  And we know commits A and B need to be examined
further, so we do them recursively.  When we look at A, we again only
look at the lines that A is the suspect:

             A: 1 2 T 4 T 6
                a a a a M b

Among 1 2 T 4, comparing against its parent O, we notice 1 2 4 are
the same so pass the blame for those lines to O:

             A: 1 2 T 4 T 6
                o o a o M b

A is a non-merge commit; we have already exhausted the parents and
take responsibility for the remaining ones that A is the suspect:

             A: 1 2 T 4 T 6
                o o A o M b

We go on like this and the final result would become:

             O: 1 2 3 4 5 6
                O O A O M B

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 19:51 ` Junio C Hamano
@ 2006-02-08 21:07   ` Ryan Anderson
  2006-02-08 21:45     ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Ryan Anderson @ 2006-02-08 21:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Wed, Feb 08, 2006 at 11:51:22AM -0800, Junio C Hamano wrote:
> Ryan Anderson <ryan@michonline.com> writes:
> 
> > Signed-off-by: Ryan Anderson <ryan@michonline.com>
> >
> > ---
> >
> > I think this version is mostly ready to go.
> >
> > Junio, the post you pointed me at was very helpful (once I got around to
> > listening to it), but the code it links to is missing - if that's a
> > better partial implementation than this, can you ressurrect it
> > somewhere?  I'd be happy to reintegrate it together.
> 
> I still have it, but the reason why I withdrew circulating it
> was because I found that on some inputs it did not work
> correctly as intended.  Not that the algorithm was necessarily
> broken but the implementation certainly was.
> 
> Unlike yours mine reads and interprets diff output to find which
> lines are common and which lines are added, and I think the diff
> interpretation logic has various corner cases wrong.  I did
> combine-diff.c diff interpreter without looking at my
> 'git-blame', so I do not remember where I got it wrong,
> though...

I tried that approach at first, and it was much much more confusing to
try to keep track of.  The problem Linus found (that of a missing
"all_lines_claimed()") was related to that code.  This implementation is
simple, though it has to have some problems with guessing at duplicated
lines incorrectly.

> It's been a while since I looked at it the last time so it may
> not even work with the current git, but here it is..

I'll take a look through this in greater detail later, hopefully your
approach can be applied.  Diff-analyzing is apparently tricky.

-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 21:07   ` Ryan Anderson
@ 2006-02-08 21:45     ` Junio C Hamano
  2006-02-10 22:04       ` Ryan Anderson
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-02-08 21:45 UTC (permalink / raw)
  To: Ryan Anderson; +Cc: git

Ryan Anderson <ryan@michonline.com> writes:

>> It's been a while since I looked at it the last time so it may
>> not even work with the current git, but here it is..
>
> I'll take a look through this in greater detail later, hopefully your
> approach can be applied.  Diff-analyzing is apparently tricky.

Reading diff is tricky but I was lazy to match up the lines by
hand, which is also a real work ;-).

There are a few things I should add to that ancient code:

 - It wants old ls-tree behaviour.  The command line used in the
   "sub find_file" needs to be updated to something like this:

    open $fh, '-|', 'git-ls-tree', '-z', '-r', $commit->{TREE}, $path
	or die "cannot read git-ls-tree $commit->{TREE}";

 - It only cares about the line numbers and its output is meant
   to be postprocessed with the contents from the latest blob.

 - It predates the recent rev-list that skips commits that do
   not change the specified paths, and it literally follows each
   parent and optimizes not to diff with uninteresting parents
   by hand.

I suspect if you go with the diff-reading approach, it might be
easy to convert it to C (or even write the initial version in C)
using the machinery similar to what is in combine-diff.c.

The algorithm combine-diff.c uses keeps the lines discarded from
each parent in lline structure linked to the sline structure
(which keeps track of the lines in the final version), but for
your annotate purposes what you care about is only what the
child adds to the parent (IOW, we do not care about the lines
that do not appear in the final version), so the logic and the
data structure could be greatly simplified.  You only need to
keep "flag" element in the sline structure, and maybe bol and
len that point at the contents of the resulting line from the
final version.  In addition, you would need to store "the
current suspect commit" (starts from the final revision and
updated as you pass the blame along) and another bool that says
if "the current suspect" is known to be the guilty party or if
the true culprit is one of its ancestors (capital vs lowercase
difference in that explanatory note).

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with  the revision and person that created each line in the file.
  2006-02-08 18:47   ` Randal L. Schwartz
@ 2006-02-09  9:19     ` Andreas Ericsson
  2006-02-09  9:57       ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Andreas Ericsson @ 2006-02-09  9:19 UTC (permalink / raw)
  To: git

Randal L. Schwartz wrote:
>>>>>>"Franck" == Franck Bui-Huu <vagabon.xyz@gmail.com> writes:
> 
> 
> Franck> another perl script :(
> 
> Franck> Are there any rules on the choice of the script language ?
> 
> I could argue that they should all be Perl. :)
> 

Brave thing to do among such a bunch of hardcore C hackers. ;)

So long as we never involve ruby, java or DCL, I'm a happy fellow.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with  the revision and person that created each line in the file.
  2006-02-09  9:19     ` Andreas Ericsson
@ 2006-02-09  9:57       ` Junio C Hamano
  2006-02-09 12:04         ` Franck Bui-Huu
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2006-02-09  9:57 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: git

Andreas Ericsson <ae@op5.se> writes:

> So long as we never involve ruby, java or DCL, I'm a happy fellow.

Wholeheartedly seconded ;-).

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-09  9:57       ` Junio C Hamano
@ 2006-02-09 12:04         ` Franck Bui-Huu
  2006-02-09 12:47           ` Andreas Ericsson
  0 siblings, 1 reply; 19+ messages in thread
From: Franck Bui-Huu @ 2006-02-09 12:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Andreas Ericsson, git

2006/2/9, Junio C Hamano <junkio@cox.net>:
> Andreas Ericsson <ae@op5.se> writes:
>
> > So long as we never involve ruby, java or DCL, I'm a happy fellow.
>
> Wholeheartedly seconded ;-).
>

I agree to but my point was more why not only using python scripts ?
Why sometimes some scripts is written in perl whereas python could be
used and vice-versa ?

Thanks
--
               Franck

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-09 12:04         ` Franck Bui-Huu
@ 2006-02-09 12:47           ` Andreas Ericsson
  0 siblings, 0 replies; 19+ messages in thread
From: Andreas Ericsson @ 2006-02-09 12:47 UTC (permalink / raw)
  To: Franck Bui-Huu; +Cc: Junio C Hamano, git

Franck Bui-Huu wrote:
> 2006/2/9, Junio C Hamano <junkio@cox.net>:
> 
>>Andreas Ericsson <ae@op5.se> writes:
>>
>>
>>>So long as we never involve ruby, java or DCL, I'm a happy fellow.
>>
>>Wholeheartedly seconded ;-).
>>
> 
> 
> I agree to but my point was more why not only using python scripts ?
> Why sometimes some scripts is written in perl whereas python could be
> used and vice-versa ?
> 

Perl is better suited for some tasks, Python for others. Mostly it's 
because the contributor (one out of 137 to date) thought the language 
appropriate for the tool he/she set out to write and felt comfortable 
with it.

I personally abhor the syntax of Perl and the block indentation of 
Python but I happily embrace both if the alternative is to rewrite all 
the script tools in C.

That said, some tools have been rewritten in the past (mostly scripts 
have been replaced by C code versions), but I don't think Junio will 
accept replacement tools just because they're in one particular 
language. If anything, it would be to replace the two python scripts 
with Perl versions, since more tools are implemented in Perl than in 
Python (so we could drop one dependency), Perl exists on more platforms 
(so git becomes more portable), and Perl is used inline in four of the 
shell-scripts (which means we can't get rid of the Perl dependency 
without major hackery anyway).

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 19:19     ` Junio C Hamano
@ 2006-02-10 11:25       ` Ralf Baechle
  2006-02-10 11:55         ` Andreas Ericsson
  2006-02-14 10:51         ` Fredrik Kuivinen
  0 siblings, 2 replies; 19+ messages in thread
From: Ralf Baechle @ 2006-02-10 11:25 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Schindelin, Franck Bui-Huu, git

On Wed, Feb 08, 2006 at 11:19:37AM -0800, Junio C Hamano wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
> 
> >> Are there any rules on the choice of the script language ?
> >
> > Yes. Do not try to introduce unnecessary dependencies. But if it is 
> > the right tool to do the job, you should use it. As of now, we have perl, 
> > python and Tcl/Tk.
> 
> Very well said.  That's what currently stands.

The dependency on Python 2.4 already is a problem for installation on some
systems ...

  Ralf

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-10 11:25       ` Ralf Baechle
@ 2006-02-10 11:55         ` Andreas Ericsson
  2006-02-14 10:51         ` Fredrik Kuivinen
  1 sibling, 0 replies; 19+ messages in thread
From: Andreas Ericsson @ 2006-02-10 11:55 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Junio C Hamano, Johannes Schindelin, Franck Bui-Huu, git

Ralf Baechle wrote:
> On Wed, Feb 08, 2006 at 11:19:37AM -0800, Junio C Hamano wrote:
> 
> 
>>Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:
>>
>>
>>>>Are there any rules on the choice of the script language ?
>>>
>>>Yes. Do not try to introduce unnecessary dependencies. But if it is 
>>>the right tool to do the job, you should use it. As of now, we have perl, 
>>>python and Tcl/Tk.
>>
>>Very well said.  That's what currently stands.
> 
> 
> The dependency on Python 2.4 already is a problem for installation on some
> systems ...
> 

Not many though. Since Python is only required on the workstation where 
the developer does his/her work it's not a very cumbersome requirement. 
The same holds for Perl, btw. It's not a requirement on the server 
hosting the public repositories, unless some of the scripts are used 
from the hooks (git shortlog is used from the default update-hook, but 
that can be changed with no trouble at all).

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-08 21:45     ` Junio C Hamano
@ 2006-02-10 22:04       ` Ryan Anderson
  0 siblings, 0 replies; 19+ messages in thread
From: Ryan Anderson @ 2006-02-10 22:04 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Wed, Feb 08, 2006 at 01:45:11PM -0800, Junio C Hamano wrote:
> Ryan Anderson <ryan@michonline.com> writes:
> 
> >> It's been a while since I looked at it the last time so it may
> >> not even work with the current git, but here it is..
> >
> > I'll take a look through this in greater detail later, hopefully your
> > approach can be applied.  Diff-analyzing is apparently tricky.
> 
> Reading diff is tricky but I was lazy to match up the lines by
> hand, which is also a real work ;-).

Reading a diff is tricky, yes, but if you're willing to just throw RAM
at the problem, it might not be quite as bad as I was trying at first.

My current thought on how to get it more correct is this:
	foreach $rev (@revqueue) {
		foreach $parent (@{$revs{$rev}{parents}}) {
			my @templines = @{$revs{$rev}{lines}};

			$revs{$parent}{lines} = apply_diff(\@templlines);
		}
	}

The @lines arrays that get built will be entirely made up of hash or
array references, so they just get reused for each successive file.

When apply_diff() deletes a line from the new copy, it should mark that
line as "claimed" by the current rev.

I'm thinking that each element of @lines will look like this:
	{
		text => $text,
		in_original => [0 | 1],
		claimed_by => $rev,
	}
at least to start.

This method can sanity check itself by calling git cat-file and actually
reading in each version of the file, and comparing it against the
generated copy, aborting if we get the two out of sync.

I'll see about implementing something along these lines this weekend,
time permitting.

-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file.
  2006-02-10 11:25       ` Ralf Baechle
  2006-02-10 11:55         ` Andreas Ericsson
@ 2006-02-14 10:51         ` Fredrik Kuivinen
  1 sibling, 0 replies; 19+ messages in thread
From: Fredrik Kuivinen @ 2006-02-14 10:51 UTC (permalink / raw)
  To: Ralf Baechle; +Cc: Junio C Hamano, Johannes Schindelin, Franck Bui-Huu, git

On Fri, Feb 10, 2006 at 11:25:41AM +0000, Ralf Baechle wrote:
> The dependency on Python 2.4 already is a problem for installation on some
> systems ... 

I understand that in the environments where the Python dependency is a
problem it is probably not due to the specific version. However, if
WITH_OWN_SUBPROCESS is defined in the Makefile then Python 2.3 should
work fine too (this is actually automatically detected now, so you
shouldn't have to do anything special to use Python 2.3).

- Fredrik

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2006-02-14 10:51 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-08 14:52 [PATCH] Add git-annotate - a tool for annotating files with the revision and person that created each line in the file Ryan Anderson
2006-02-08 15:09 ` Peter Eriksen
2006-02-08 16:05   ` Johannes Schindelin
2006-02-08 16:35 ` Franck Bui-Huu
2006-02-08 17:45   ` Johannes Schindelin
2006-02-08 19:19     ` Junio C Hamano
2006-02-10 11:25       ` Ralf Baechle
2006-02-10 11:55         ` Andreas Ericsson
2006-02-14 10:51         ` Fredrik Kuivinen
2006-02-08 18:47   ` Randal L. Schwartz
2006-02-09  9:19     ` Andreas Ericsson
2006-02-09  9:57       ` Junio C Hamano
2006-02-09 12:04         ` Franck Bui-Huu
2006-02-09 12:47           ` Andreas Ericsson
2006-02-08 19:09 ` Linus Torvalds
2006-02-08 19:51 ` Junio C Hamano
2006-02-08 21:07   ` Ryan Anderson
2006-02-08 21:45     ` Junio C Hamano
2006-02-10 22:04       ` Ryan Anderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).