Git development
 help / color / mirror / Atom feed
* [PATCH] Add git-edit-index.perl
From: Neil Roberts @ 2008-12-17 20:47 UTC (permalink / raw)
  To: git

This script can be used to edit a file in the index without affecting
your working tree. It checkouts a copy of the file to a temporary file
and runs an editor on it. If the editor completes successfully with a
non-empty file then it updates the index with the new data.

This is useful to fine tune the results from git add -p. For example
sometimes your unrelated changes are too close together and
git-add--interactive will refuse to split them up. Using this script
you can add both the changes and later edit the index file to
temporarily remove one of the changes.

Signed-off-by: Neil Roberts <bpeeluk@yahoo.co.uk>
---
 .gitignore          |    1 +
 Makefile            |    1 +
 git-edit-index.perl |   98 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 100 insertions(+), 0 deletions(-)
 create mode 100755 git-edit-index.perl

diff --git a/.gitignore b/.gitignore
index d9adce5..251d90b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ git-diff-files
 git-diff-index
 git-diff-tree
 git-describe
+git-edit-index
 git-fast-export
 git-fast-import
 git-fetch
diff --git a/Makefile b/Makefile
index 5158197..77ee97f 100644
--- a/Makefile
+++ b/Makefile
@@ -275,6 +275,7 @@ SCRIPT_PERL += git-archimport.perl
 SCRIPT_PERL += git-cvsexportcommit.perl
 SCRIPT_PERL += git-cvsimport.perl
 SCRIPT_PERL += git-cvsserver.perl
+SCRIPT_PERL += git-edit-index.perl
 SCRIPT_PERL += git-relink.perl
 SCRIPT_PERL += git-send-email.perl
 SCRIPT_PERL += git-svn.perl
diff --git a/git-edit-index.perl b/git-edit-index.perl
new file mode 100755
index 0000000..a5d9886
--- /dev/null
+++ b/git-edit-index.perl
@@ -0,0 +1,98 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2008 Neil Roberts <bpeeluk@yahoo.co.uk>
+#
+# GPL v2 (See COPYING)
+#
+# Opens an editor on a copy of a file in the index and updates it when
+# the editor is finished. This can be used to fine tune to results of
+# git add -p
+
+use strict;
+use warnings;
+use Git;
+
+sub usage {
+	print <<EOT;
+git edit-index <file>...
+EOT
+	exit(1);
+}
+
+sub delete_temp_files {
+        # Delete the temporary files created by checkout-index
+        # --temp. The output from checkout-index should be passed as
+        # arguments
+        foreach my $fnfull (@_) {
+                my ($tmp_fn, $fn) = split(/\t/, $fnfull);
+                unlink($tmp_fn);
+        }
+}
+
+sub check_file_size {
+        my ($fn) = @_;
+        my ($dev, $ino, $mode, $nlink, $uid, $gid, $rdev, $size,
+            $atime, $mtime, $ctime, $blksize, $blocks) = stat($fn);
+
+        $size;
+}
+
+usage unless @ARGV;
+
+my $repo = Git->repository();
+
+my %file_modes;
+
+my $editor = $ENV{GIT_EDITOR}
+    || $repo->config("core.editor")
+    || $ENV{VISUAL}
+    || $ENV{EDITOR}
+    || "vi";
+
+# Create a temporary copy of each file in the index
+my @file_list = $repo->command(qw(checkout-index --temp --), @ARGV);
+
+# Get the current mode of each file
+foreach my $fnfull (@file_list) {
+        my ($tmp_fn, $fn) = split(/\t/, $fnfull);
+        my ($file_details) = $repo->command_oneline(qw(ls-files --stage --),
+                                                    $fn);
+        unless (defined($file_details) && $file_details =~ /\A([0-7]{6}) /)
+        {
+                delete_temp_files(@file_list);
+                die("$fn is not in the index");
+        }
+
+        $file_modes{$fn} = $1;
+}
+
+# Edit each file
+foreach my $fnfull (@file_list) {
+        my ($tmp_fn, $fn) = split(/\t/, $fnfull);
+
+        unless (system($editor, $tmp_fn) == 0
+                && check_file_size($tmp_fn)) {
+                # If the editor failed, the file has disappeared or it
+                # has zero size then give up
+                delete_temp_files(@file_list);
+                die("Editor failed or file has zero size");
+        }
+}
+
+# Add each file back to the index
+foreach my $fnfull (@file_list) {
+        my ($tmp_fn, $fn) = split(/\t/, $fnfull);
+
+        my $hash = $repo->command_oneline(qw(hash-object -w --), $tmp_fn);
+
+        unless (defined($hash) && $hash =~ /\A[0-9a-f]{40}\z/) {
+                delete_temp_files(@file_list);
+                die("Failed to add new file");
+        }
+
+        $repo->command(qw(update-index --cacheinfo),
+                       $file_modes{$fn}, $hash, $fn);
+}
+
+# Clean up the temporary files
+delete_temp_files(@file_list);
-- 
1.5.6.3

^ permalink raw reply related

* [PATCH 8/5] combine-diff.c: use strbuf_readlink()
From: Junio C Hamano @ 2008-12-17 20:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171044110.14014@localhost.localdomain>

When showing combined diff using work tree contents, use strbuf_readlink()
to read symbolic links.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 combine-diff.c |   10 +++++-----
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git i/combine-diff.c w/combine-diff.c
index ec8df39..bccc018 100644
--- i/combine-diff.c
+++ w/combine-diff.c
@@ -703,15 +703,15 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent,
 			goto deleted_file;
 
 		if (S_ISLNK(st.st_mode)) {
-			size_t len = xsize_t(st.st_size);
-			result_size = len;
-			result = xmalloc(len + 1);
-			if (result_size != readlink(elem->path, result, len)) {
+			struct strbuf buf = STRBUF_INIT;
+
+			if (strbuf_readlink(&buf, elem->path, st.st_size) < 0) {
 				error("readlink(%s): %s", elem->path,
 				      strerror(errno));
 				return;
 			}
-			result[len] = 0;
+			result_size = buf.len;
+			result = strbuf_detach(&buf, NULL);
 			elem->mode = canon_mode(st.st_mode);
 		}
 		else if (0 <= (fd = open(elem->path, O_RDONLY)) &&

^ permalink raw reply related

* [PATCH 6/5] make_absolute_path(): check bounds when seeing an overlong symlink
From: Junio C Hamano @ 2008-12-17 20:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171044110.14014@localhost.localdomain>


Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 abspath.c |    2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git i/abspath.c w/abspath.c
index 8194ce1..649f34f 100644
--- i/abspath.c
+++ w/abspath.c
@@ -64,6 +64,8 @@ const char *make_absolute_path(const char *path)
 			len = readlink(buf, next_buf, PATH_MAX);
 			if (len < 0)
 				die ("Invalid symlink: %s", buf);
+			if (PATH_MAX <= len)
+				die("symbolic link too long: %s", buf);
 			next_buf[len] = '\0';
 			buf = next_buf;
 			buf_index = 1 - buf_index;

^ permalink raw reply related

* [PATCH 7/5] builtin-blame.c: use strbuf_readlink()
From: Junio C Hamano @ 2008-12-17 20:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171044110.14014@localhost.localdomain>

When faking a commit out of the work tree contents, use strbuf_readlink()
to read the contents of symbolic links.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin-blame.c |    5 +----
 1 files changed, 1 insertions(+), 4 deletions(-)

diff --git i/builtin-blame.c w/builtin-blame.c
index a0d6014..aae14ef 100644
--- i/builtin-blame.c
+++ w/builtin-blame.c
@@ -1996,7 +1996,6 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
 	if (!contents_from || strcmp("-", contents_from)) {
 		struct stat st;
 		const char *read_from;
-		unsigned long fin_size;
 
 		if (contents_from) {
 			if (stat(contents_from, &st) < 0)
@@ -2008,7 +2007,6 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
 				die("Cannot lstat %s", path);
 			read_from = path;
 		}
-		fin_size = xsize_t(st.st_size);
 		mode = canon_mode(st.st_mode);
 		switch (st.st_mode & S_IFMT) {
 		case S_IFREG:
@@ -2016,9 +2014,8 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
 				die("cannot open or read %s", read_from);
 			break;
 		case S_IFLNK:
-			if (readlink(read_from, buf.buf, buf.alloc) != fin_size)
+			if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
 				die("cannot readlink %s", read_from);
-			buf.len = fin_size;
 			break;
 		default:
 			die("unsupported file type %s", read_from);

^ permalink raw reply related

* Re: [PATCH 4/5] Make 'diff_populate_filespec()' use the new 'strbuf_readlink()'
From: Junio C Hamano @ 2008-12-17 20:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171043440.14014@localhost.localdomain>

Linus Torvalds <torvalds@linux-foundation.org> writes:

> diff --git a/diff.c b/diff.c
> index afefe08..4b2029c 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -1773,19 +1773,17 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
>  		s->size = xsize_t(st.st_size);
>  		if (!s->size)
>  			goto empty;
> -		if (size_only)
> -			return 0;
>  		if (S_ISLNK(st.st_mode)) {
>  ...
>  		}
> +		if (size_only)
> +			return 0;

It is unfortunate that we need to always readlink even when we only would
want to cull differences early (e.g. --raw without any fancy filters such
as rename detection), but symbolic links should be minorities in any sane
repo, and it should not be worth trying to optimize this for sane
filesystems by making it conditional.

^ permalink raw reply

* Re: [PATCH 3/5] Make 'index_path()' use 'strbuf_readlink()'
From: Junio C Hamano @ 2008-12-17 20:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171043180.14014@localhost.localdomain>

Linus Torvalds <torvalds@linux-foundation.org> writes:

> @@ -2537,20 +2536,17 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write
>  				     path);
>  		break;
>  	case S_IFLNK:
> -		len = xsize_t(st->st_size);
> -		target = xmalloc(len + 1);
> -		if (readlink(path, target, len + 1) != st->st_size) {
> +		if (strbuf_readlink(&sb, path, st->st_size)) {
>  			char *errstr = strerror(errno);
> -			free(target);
>  			return error("readlink(\"%s\"): %s", path,
>  			             errstr);

Thanks; as strbuf_readlink() does not do any iffy library calls that would
stomp on errno, the error reporting should still be valid here.

^ permalink raw reply

* Re: Is it possible to roll back unstaged changes while leaving the staged ones for the next commit?
From: Junio C Hamano @ 2008-12-17 20:30 UTC (permalink / raw)
  To: Tim Visher; +Cc: git
In-Reply-To: <c115fd3c0812171157m3d180534gb5630fbcf39b2bbd@mail.gmail.com>

"Tim Visher" <tim.visher@gmail.com> writes:

> My question is this: is it possible to revert the changes that I
> haven't staged so that the file looks as if everything inside of it
> has been staged because none of the stuff I didn't stage is there
> anymore.

The last part of the sentence after "because" does not parse for me at
all, but I think you are after one of the following:

 (1) if you want to get rid of garbage changes in your work tree, you
     would want "git checkout $that_path";

 (2) if you want to temporarily stash away further changes in your work
     tree, because you would want to first test what is staged, commit it,
     and then later continue to refine the changes stashed away thusly,
     you would want "git stash --keep-index".

^ permalink raw reply

* Re: How to maintain private/secret/confidential branch.
From: Daniel Barkalow @ 2008-12-17 20:27 UTC (permalink / raw)
  To: Łukasz Lew; +Cc: Alexander Potashev, Nick Andrew, git
In-Reply-To: <c55009e70812171157s7932c0b3u7a8ee6557c140d56@mail.gmail.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 6752 bytes --]

On Wed, 17 Dec 2008, Łukasz Lew wrote:

> Well, I am still a beginner in git. I just switched from mercurial.
> Some inline follows:
> 
> 2008/12/15 Daniel Barkalow <barkalow@iabervon.org>:
> > On Sun, 14 Dec 2008, Łukasz Lew wrote:
> >
> >> Hi Alexander,
> >>
> >> On Sun, Dec 14, 2008 at 17:06, Alexander Potashev <aspotashev@gmail.com> wrote:
> >> > Hello, Łukasz!
> >> >
> >> > On 16:38 Sun 14 Dec     , Łukasz Lew wrote:
> >> >> Thanks Nick, thats really helpful (and surprisingly simple).
> >> >> I have a couple more questions:
> >> >>
> >> >> On Sun, Dec 14, 2008 at 15:55, Nick Andrew <nick@nick-andrew.net> wrote:
> >> >> > On Sun, Dec 14, 2008 at 02:49:50PM +0100, Łukasz Lew wrote:
> >> >> >> I don't know how to make such a scenario work:
> >> >> >> - two repositories: pub, priv
> >> >> >> - priv is clone/branch of pub
> >> >> >> - there is some constant developement both in pub and priv
> >> >> >> - there are regular syncs with pub in priv
> >> >> >>
> >> >> >> Problem:
> >> >> >> Occasionally I want to push some changes from priv to pub.
> >> >> >> Then after syncing with pub I want to get as few conflicts as possible.
> >> >> >>
> >> >> >> Is it possible to do with git?
> >> >> >
> >> >> > Git can do almost anything. One should instead ask "How to do this
> >> >> > with git?" :-)
> >> >>
> >> >> So I've heard, but not yet experienced it myself. I'm thrilled to try.
> >> >>
> >> >> >
> >> >> > If I understand your problem, you could solve it with git cherry-pick
> >> >> > and rebase. On priv, make a for-public branch from a pub branch. Then
> >> >> > cherry-pick the commits you want from your private branch into the
> >> >> > for-public branch.
> >> >>
> >> >> That almost works. Can I somehow split existing commits just like in git-add -p?
> >> > It's, however, better to make more commits to not experience the need of
> >> > commit splitting.
> >>
> >> Indeed good advice and best practice, but another best practice is to
> >> not commit not compiling state.
> >
> > In your private branches, it's actually good practice to commit all sorts
> > of junk. That way, when you mess up badly while trying to get it to
> > compile, you won't have lost your work. Of course, that means your commits
> > are going to need more cleanup before going public.
> 
> I started to follow your advise.
> Then I rebase -i.
> I found out I need more precise commit messages. :)

One useful strategy is to have a second shell and do "git show <hash>" to 
figure out what you did in that misc commit.

> >> My common scenario is that I code a big change in priv repository, and
> >> after that I find that some of its parts can and should be moved to
> >> pub.
> >
> > I usually end up with my private branch containing the public branch, plus
> > a bunch of commits that introduce: bugs, later fixed; mixed improvements;
> > and debugging cruft. I want to generate nice commits that are individual
> > improvements. I generally do:
> > $ git checkout -b submit origin/master (the first time, to set it up)
> >
> > $ git checkout submit
> > $ git diff submit mixed-work
> > look at it for good changes, find some in file1 and file2
> > $ git diff submit mixed-work -- file1 file2 | git apply
> 
> But with this command we do not preserve objects identity.
> I.e: when you merge with mixed-work you have duplicate changes.
> Is it ok?

Git is very good about recognizing duplicate changes in 3-way situations. 
That is, merging two branches, each of which makes the same change (on a 
hunk level) to a common ancestor. It'll identify this as "the branches 
agree on a change" rather than "the branches conflict". Also, "rebase" 
will try the 3-way merge mechanism, so it will be able to sort this out.

The interesting case is when both branches have the same logical change, 
but one of them is done better than the other. When you merge these, 
you'll have to select the better one by hand in a conflict resolution.

> > Sometimes, clean up bits that aren't ideal
> > $ git add -i
> > Add the good parts
> > $ git checkout . (revert the working tree to the index)
> > $ make test (did I extract the change correctly?)
> > $ git commit
> > Write a good message, sign off, etc
> > $ git checkout mixed-work
> > $ git rebase -i submit
> 
> ... Ah I see, we throw away old commits anyway with rebasing.

Yup. The old commits are there to save us when we make good changes and 
undo them before getting to a finished state. Once we reach a finished 
state, we intend to throw them away.

> > Often, resolve easy conflicts where my mixed-work branch introduced bugs
> > that I fixed later and have now adopted the fixed code
> >
> > Then I repeat until I don't have any more good changes in mixed-work
> > (either I have nothing, only debugging cruft, or only stuff I haven't
> > gotten to work yet). If there's nothing but cruft, I've fully merged the
> > topic, and I delete the branch.
> >
> > Eventually, I'm satisfied with what I've cleaned up, and I do:
> > $ git push origin submit:master
> >
> > Also, I generally have a bunch of "mixed-work" branches, each containing
> > different stuff that isn't ready. I'll periodicly go through all of them
> > and rebase onto "submit" or "origin/master" (or, sometimes, give up on
> > them and delete them).
> >
> > (One thing that would be nice to have is a "git apply --interactive" which
> > applies the user's choice of hunks, like "git add -i" adds them)
> 
> I totally agree.
> 
> I would appriciate rebase --copy option, which doesn't move, but copy
> the changelists like cherry-pick.

There's work in progress on a generalization of "rebase -i" that could be 
seeded with the "cherry-pick" operations instead of the "rebase" 
operations. I think that's what you'd like. On the other hand, remember 
that you can just make a new branch based on your endpoint and rebase it 
on your upstream; there's no reason that you can't "unzip" the history 
past the point where the branch you're modifying was created.

> Then we could use rebase -i (with edit) instead of apply.
> 
> PS
> Why after edit in rebase -i the change is already commited? I always
> have to reset;add -i

There's (currently) no equivalent of the index (storing the contents of 
the commit in progress) for the message (and author info, etc). On the 
other hand, you can use "git commit --amend" to alter the commit on top 
(including the files), and you can do "git diff HEAD HEAD^ | git apply" to 
get reverts into your worktree that you can add (or not add).

The common case for edit, I think, is that things are mostly correct, but 
there's a wrong change; with the change already committed, it's easy to 
change it to what it should be and "git commit -a --amend".

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* [JGIT PATCH 2/2 v2] Add getScriptText functions to obtain the plain-text version of a patch
From: Shawn O. Pearce @ 2008-12-17 20:13 UTC (permalink / raw)
  To: Robin Rosenberg; +Cc: git
In-Reply-To: <200812132226.47815.robin.rosenberg.lists@dewire.com>

The conversion from byte[] to String is performed one file at a time,
in case the patch is a character encoding conversion patch for the
file.  For simplicity we currently assume UTF-8 still as the default
encoding for any content, but eventually we should support using the
.gitattributes encoding property when performing this conversion.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
  Robin Rosenberg <robin.rosenberg.lists@dewire.com> wrote:
  > > For usefulness we must be able to pass the encoding from outside, 
  > > e.g. the encoding Eclipse uses, which often is not UTF-8-
  > 
  > It's even worse. You should probably do the encoding guess on the whole
  > patch, or per file and not per line so make success possible at all. Reading
  > and writing as ISO-8859-1 will always work as that is just padding every
  > byte with NUL on reading and dropping it on writing. I.e. if your convert
  > to char at all...

  So this patch does the "whole file" thing.  But there is a
  fast-path in getScriptText to try and bypass the multiple copies
  we have to make in order to shovel the entire file into the
  CharsetDecoder just to read the patch.  It isn't common to see
  a character set conversion patch, so the fast case of decoding
  the whole patch text at once should happen most of the time.

 .../jgit/patch/testGetText_BothISO88591.patch      |   21 +++
 .../spearce/jgit/patch/testGetText_Convert.patch   |   21 +++
 .../spearce/jgit/patch/testGetText_DiffCc.patch    |   13 ++
 .../spearce/jgit/patch/testGetText_NoBinary.patch  |    4 +
 .../tst/org/spearce/jgit/patch/GetTextTest.java    |  142 ++++++++++++++++++++
 .../org/spearce/jgit/patch/CombinedFileHeader.java |   27 ++++
 .../org/spearce/jgit/patch/CombinedHunkHeader.java |  127 +++++++++++++++++
 .../src/org/spearce/jgit/patch/FileHeader.java     |  116 ++++++++++++++++
 .../src/org/spearce/jgit/patch/HunkHeader.java     |   86 ++++++++++++
 .../src/org/spearce/jgit/util/RawParseUtils.java   |   57 ++++++++-
 10 files changed, 611 insertions(+), 3 deletions(-)
 create mode 100644 org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_BothISO88591.patch
 create mode 100644 org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_Convert.patch
 create mode 100644 org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_DiffCc.patch
 create mode 100644 org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_NoBinary.patch
 create mode 100644 org.spearce.jgit.test/tst/org/spearce/jgit/patch/GetTextTest.java

diff --git a/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_BothISO88591.patch b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_BothISO88591.patch
new file mode 100644
index 0000000..8224fcc
--- /dev/null
+++ b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_BothISO88591.patch
@@ -0,0 +1,21 @@
+diff --git a/X b/X
+index 014ef30..8c80a36 100644
+--- a/X
++++ b/X
+@@ -1,7 +1,7 @@
+ a
+ b
+ c
+-�ngstr�m
++line 4 �ngstr�m
+ d
+ e
+ f
+@@ -13,6 +13,6 @@ k
+ l
+ m
+ n
+-�ngstr�m
++�ngstr�m; line 16
+ o
+ p
diff --git a/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_Convert.patch b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_Convert.patch
new file mode 100644
index 0000000..a43fef5
--- /dev/null
+++ b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_Convert.patch
@@ -0,0 +1,21 @@
+diff --git a/X b/X
+index 014ef30..209db0d 100644
+--- a/X
++++ b/X
+@@ -1,7 +1,7 @@
+ a
+ b
+ c
+-�ngstr�m
++Ångström
+ d
+ e
+ f
+@@ -13,6 +13,6 @@ k
+ l
+ m
+ n
+-�ngstr�m
++Ångström
+ o
+ p
diff --git a/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_DiffCc.patch b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_DiffCc.patch
new file mode 100644
index 0000000..3f74a52
--- /dev/null
+++ b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_DiffCc.patch
@@ -0,0 +1,13 @@
+diff --cc X
+index bdfc9f4,209db0d..474bd69
+--- a/X
++++ b/X
+@@@ -1,7 -1,7 +1,7 @@@
+  a
+--b
+  c
+ +test �ngstr�m
++ Ångström
+  d
+  e
+  f
diff --git a/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_NoBinary.patch b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_NoBinary.patch
new file mode 100644
index 0000000..e4968dc
--- /dev/null
+++ b/org.spearce.jgit.test/tst-rsrc/org/spearce/jgit/patch/testGetText_NoBinary.patch
@@ -0,0 +1,4 @@
+diff --git a/org.spearce.egit.ui/icons/toolbar/fetchd.png b/org.spearce.egit.ui/icons/toolbar/fetchd.png
+new file mode 100644
+index 0000000..4433c54
+Binary files /dev/null and b/org.spearce.egit.ui/icons/toolbar/fetchd.png differ
diff --git a/org.spearce.jgit.test/tst/org/spearce/jgit/patch/GetTextTest.java b/org.spearce.jgit.test/tst/org/spearce/jgit/patch/GetTextTest.java
new file mode 100644
index 0000000..04810be
--- /dev/null
+++ b/org.spearce.jgit.test/tst/org/spearce/jgit/patch/GetTextTest.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2008, Google Inc.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Git Development Community nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.spearce.jgit.patch;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import junit.framework.TestCase;
+
+public class GetTextTest extends TestCase {
+	public void testGetText_BothISO88591() throws IOException {
+		final Charset cs = Charset.forName("ISO-8859-1");
+		final Patch p = parseTestPatchFile();
+		assertTrue(p.getErrors().isEmpty());
+		assertEquals(1, p.getFiles().size());
+		final FileHeader fh = p.getFiles().get(0);
+		assertEquals(2, fh.getHunks().size());
+		assertEquals(readTestPatchFile(cs), fh.getScriptText(cs, cs));
+	}
+
+	public void testGetText_NoBinary() throws IOException {
+		final Charset cs = Charset.forName("ISO-8859-1");
+		final Patch p = parseTestPatchFile();
+		assertTrue(p.getErrors().isEmpty());
+		assertEquals(1, p.getFiles().size());
+		final FileHeader fh = p.getFiles().get(0);
+		assertEquals(0, fh.getHunks().size());
+		assertEquals(readTestPatchFile(cs), fh.getScriptText(cs, cs));
+	}
+
+	public void testGetText_Convert() throws IOException {
+		final Charset csOld = Charset.forName("ISO-8859-1");
+		final Charset csNew = Charset.forName("UTF-8");
+		final Patch p = parseTestPatchFile();
+		assertTrue(p.getErrors().isEmpty());
+		assertEquals(1, p.getFiles().size());
+		final FileHeader fh = p.getFiles().get(0);
+		assertEquals(2, fh.getHunks().size());
+
+		// Read the original file as ISO-8859-1 and fix up the one place
+		// where we changed the character encoding. That makes the exp
+		// string match what we really expect to get back.
+		//
+		String exp = readTestPatchFile(csOld);
+		exp = exp.replace("\303\205ngstr\303\266m", "\u00c5ngstr\u00f6m");
+
+		assertEquals(exp, fh.getScriptText(csOld, csNew));
+	}
+
+	public void testGetText_DiffCc() throws IOException {
+		final Charset csOld = Charset.forName("ISO-8859-1");
+		final Charset csNew = Charset.forName("UTF-8");
+		final Patch p = parseTestPatchFile();
+		assertTrue(p.getErrors().isEmpty());
+		assertEquals(1, p.getFiles().size());
+		final CombinedFileHeader fh = (CombinedFileHeader) p.getFiles().get(0);
+		assertEquals(1, fh.getHunks().size());
+
+		// Read the original file as ISO-8859-1 and fix up the one place
+		// where we changed the character encoding. That makes the exp
+		// string match what we really expect to get back.
+		//
+		String exp = readTestPatchFile(csOld);
+		exp = exp.replace("\303\205ngstr\303\266m", "\u00c5ngstr\u00f6m");
+
+		assertEquals(exp, fh
+				.getScriptText(new Charset[] { csNew, csOld, csNew }));
+	}
+
+	private Patch parseTestPatchFile() throws IOException {
+		final String patchFile = getName() + ".patch";
+		final InputStream in = getClass().getResourceAsStream(patchFile);
+		if (in == null) {
+			fail("No " + patchFile + " test vector");
+			return null; // Never happens
+		}
+		try {
+			final Patch p = new Patch();
+			p.parse(in);
+			return p;
+		} finally {
+			in.close();
+		}
+	}
+
+	private String readTestPatchFile(final Charset cs) throws IOException {
+		final String patchFile = getName() + ".patch";
+		final InputStream in = getClass().getResourceAsStream(patchFile);
+		if (in == null) {
+			fail("No " + patchFile + " test vector");
+			return null; // Never happens
+		}
+		try {
+			final InputStreamReader r = new InputStreamReader(in, cs);
+			char[] tmp = new char[2048];
+			final StringBuilder s = new StringBuilder();
+			int n;
+			while ((n = r.read(tmp)) > 0)
+				s.append(tmp, 0, n);
+			return s.toString();
+		} finally {
+			in.close();
+		}
+	}
+}
diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedFileHeader.java b/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedFileHeader.java
index 3ccc418..a27e0f8 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedFileHeader.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedFileHeader.java
@@ -41,7 +41,9 @@
 import static org.spearce.jgit.util.RawParseUtils.match;
 import static org.spearce.jgit.util.RawParseUtils.nextLF;
 
+import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 
 import org.spearce.jgit.lib.AbbreviatedObjectId;
@@ -111,6 +113,31 @@ public AbbreviatedObjectId getOldId(final int nthParent) {
 		return oldIds[nthParent];
 	}
 
+	@Override
+	public String getScriptText(final Charset ocs, final Charset ncs) {
+		final Charset[] cs = new Charset[getParentCount() + 1];
+		Arrays.fill(cs, ocs);
+		cs[getParentCount()] = ncs;
+		return getScriptText(cs);
+	}
+
+	/**
+	 * Convert the patch script for this file into a string.
+	 * 
+	 * @param charsetGuess
+	 *            optional array to suggest the character set to use when
+	 *            decoding each file's line. If supplied the array must have a
+	 *            length of <code>{@link #getParentCount()} + 1</code>
+	 *            representing the old revision character sets and the new
+	 *            revision character set.
+	 * @return the patch script, as a Unicode string.
+	 */
+	@Override
+	public String getScriptText(final Charset[] charsetGuess) {
+		return super.getScriptText(charsetGuess);
+	}
+
+	@Override
 	int parseGitHeaders(int ptr, final int end) {
 		while (ptr < end) {
 			final int eol = nextLF(buf, ptr);
diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedHunkHeader.java b/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedHunkHeader.java
index 3e5c465..83ea681 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedHunkHeader.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/patch/CombinedHunkHeader.java
@@ -40,6 +40,9 @@
 import static org.spearce.jgit.util.RawParseUtils.nextLF;
 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
 
+import java.io.IOException;
+import java.io.OutputStream;
+
 import org.spearce.jgit.lib.AbbreviatedObjectId;
 import org.spearce.jgit.util.MutableInteger;
 
@@ -188,4 +191,128 @@ int parseBody(final Patch script, final int end) {
 
 		return c;
 	}
+
+	@Override
+	void extractFileLines(final OutputStream[] out) throws IOException {
+		final byte[] buf = file.buf;
+		int ptr = startOffset;
+		int eol = nextLF(buf, ptr);
+		if (endOffset <= eol)
+			return;
+
+		// Treat the hunk header as though it were from the ancestor,
+		// as it may have a function header appearing after it which
+		// was copied out of the ancestor file.
+		//
+		out[0].write(buf, ptr, eol - ptr);
+
+		SCAN: for (ptr = eol; ptr < endOffset; ptr = eol) {
+			eol = nextLF(buf, ptr);
+
+			if (eol - ptr < old.length + 1) {
+				// Line isn't long enough to mention the state of each
+				// ancestor. It must be the end of the hunk.
+				break SCAN;
+			}
+
+			switch (buf[ptr]) {
+			case ' ':
+			case '-':
+			case '+':
+				break;
+
+			default:
+				// Line can't possibly be part of this hunk; the first
+				// ancestor information isn't recognizable.
+				//
+				break SCAN;
+			}
+
+			int delcnt = 0;
+			for (int ancestor = 0; ancestor < old.length; ancestor++) {
+				switch (buf[ptr + ancestor]) {
+				case '-':
+					delcnt++;
+					out[ancestor].write(buf, ptr, eol - ptr);
+					continue;
+
+				case ' ':
+					out[ancestor].write(buf, ptr, eol - ptr);
+					continue;
+
+				case '+':
+					continue;
+
+				default:
+					break SCAN;
+				}
+			}
+			if (delcnt < old.length) {
+				// This line appears in the new file if it wasn't deleted
+				// relative to all ancestors.
+				//
+				out[old.length].write(buf, ptr, eol - ptr);
+			}
+		}
+	}
+
+	void extractFileLines(final StringBuilder sb, final String[] text,
+			final int[] offsets) {
+		final byte[] buf = file.buf;
+		int ptr = startOffset;
+		int eol = nextLF(buf, ptr);
+		if (endOffset <= eol)
+			return;
+		copyLine(sb, text, offsets, 0);
+		SCAN: for (ptr = eol; ptr < endOffset; ptr = eol) {
+			eol = nextLF(buf, ptr);
+
+			if (eol - ptr < old.length + 1) {
+				// Line isn't long enough to mention the state of each
+				// ancestor. It must be the end of the hunk.
+				break SCAN;
+			}
+
+			switch (buf[ptr]) {
+			case ' ':
+			case '-':
+			case '+':
+				break;
+
+			default:
+				// Line can't possibly be part of this hunk; the first
+				// ancestor information isn't recognizable.
+				//
+				break SCAN;
+			}
+
+			boolean copied = false;
+			for (int ancestor = 0; ancestor < old.length; ancestor++) {
+				switch (buf[ptr + ancestor]) {
+				case ' ':
+				case '-':
+					if (copied)
+						skipLine(text, offsets, ancestor);
+					else {
+						copyLine(sb, text, offsets, ancestor);
+						copied = true;
+					}
+					continue;
+
+				case '+':
+					continue;
+
+				default:
+					break SCAN;
+				}
+			}
+			if (!copied) {
+				// If none of the ancestors caused the copy then this line
+				// must be new across the board, so it only appears in the
+				// text of the new file.
+				//
+				copyLine(sb, text, offsets, old.length);
+			}
+		}
+	}
 }
diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java b/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java
index c91f80e..66c785f 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/patch/FileHeader.java
@@ -39,10 +39,15 @@
 
 import static org.spearce.jgit.lib.Constants.encodeASCII;
 import static org.spearce.jgit.util.RawParseUtils.decode;
+import static org.spearce.jgit.util.RawParseUtils.decodeNoFallback;
+import static org.spearce.jgit.util.RawParseUtils.extractBinaryString;
 import static org.spearce.jgit.util.RawParseUtils.match;
 import static org.spearce.jgit.util.RawParseUtils.nextLF;
 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
 
+import java.io.IOException;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
@@ -51,6 +56,8 @@
 import org.spearce.jgit.lib.Constants;
 import org.spearce.jgit.lib.FileMode;
 import org.spearce.jgit.util.QuotedString;
+import org.spearce.jgit.util.RawParseUtils;
+import org.spearce.jgit.util.TemporaryBuffer;
 
 /** Patch header describing an action for a single file path. */
 public class FileHeader {
@@ -189,6 +196,115 @@ public int getEndOffset() {
 	}
 
 	/**
+	 * Convert the patch script for this file into a string.
+	 * <p>
+	 * The default character encoding ({@link Constants#CHARSET}) is assumed for
+	 * both the old and new files.
+	 * 
+	 * @return the patch script, as a Unicode string.
+	 */
+	public String getScriptText() {
+		return getScriptText(null, null);
+	}
+
+	/**
+	 * Convert the patch script for this file into a string.
+	 * 
+	 * @param oldCharset
+	 *            hint character set to decode the old lines with.
+	 * @param newCharset
+	 *            hint character set to decode the new lines with.
+	 * @return the patch script, as a Unicode string.
+	 */
+	public String getScriptText(Charset oldCharset, Charset newCharset) {
+		return getScriptText(new Charset[] { oldCharset, newCharset });
+	}
+
+	protected String getScriptText(Charset[] charsetGuess) {
+		if (getHunks().isEmpty()) {
+			// If we have no hunks then we can safely assume the entire
+			// patch is a binary style patch, or a meta-data only style
+			// patch. Either way the encoding of the headers should be
+			// strictly 7-bit US-ASCII and the body is either 7-bit ASCII
+			// (due to the base 85 encoding used for a BinaryHunk) or is
+			// arbitrary noise we have chosen to ignore and not understand
+			// (e.g. the message "Binary files ... differ").
+			//
+			return extractBinaryString(buf, startOffset, endOffset);
+		}
+
+		if (charsetGuess != null && charsetGuess.length != getParentCount() + 1)
+			throw new IllegalArgumentException("Expected "
+					+ (getParentCount() + 1) + " character encoding guesses");
+
+		if (trySimpleConversion(charsetGuess)) {
+			Charset cs = charsetGuess != null ? charsetGuess[0] : null;
+			if (cs == null)
+				cs = Constants.CHARSET;
+			try {
+				return decodeNoFallback(cs, buf, startOffset, endOffset);
+			} catch (CharacterCodingException cee) {
+				// Try the much slower, more-memory intensive version which
+				// can handle a character set conversion patch.
+			}
+		}
+
+		final StringBuilder r = new StringBuilder(endOffset - startOffset);
+
+		// Always treat the headers as US-ASCII; Git file names are encoded
+		// in a C style escape if any character has the high-bit set.
+		//
+		final int hdrEnd = getHunks().get(0).getStartOffset();
+		for (int ptr = startOffset; ptr < hdrEnd;) {
+			final int eol = Math.min(hdrEnd, nextLF(buf, ptr));
+			r.append(extractBinaryString(buf, ptr, eol));
+			ptr = eol;
+		}
+
+		final String[] files = extractFileLines(charsetGuess);
+		final int[] offsets = new int[files.length];
+		for (final HunkHeader h : getHunks())
+			h.extractFileLines(r, files, offsets);
+		return r.toString();
+	}
+
+	private static boolean trySimpleConversion(final Charset[] charsetGuess) {
+		if (charsetGuess == null)
+			return true;
+		for (int i = 1; i < charsetGuess.length; i++) {
+			if (charsetGuess[i] != charsetGuess[0])
+				return false;
+		}
+		return true;
+	}
+
+	private String[] extractFileLines(final Charset[] csGuess) {
+		final TemporaryBuffer[] tmp = new TemporaryBuffer[getParentCount() + 1];
+		try {
+			for (int i = 0; i < tmp.length; i++)
+				tmp[i] = new TemporaryBuffer();
+			for (final HunkHeader h : getHunks())
+				h.extractFileLines(tmp);
+
+			final String[] r = new String[tmp.length];
+			for (int i = 0; i < tmp.length; i++) {
+				Charset cs = csGuess != null ? csGuess[i] : null;
+				if (cs == null)
+					cs = Constants.CHARSET;
+				r[i] = RawParseUtils.decode(cs, tmp[i].toByteArray());
+			}
+			return r;
+		} catch (IOException ioe) {
+			throw new RuntimeException("Cannot convert script to text", ioe);
+		} finally {
+			for (final TemporaryBuffer b : tmp) {
+				if (b != null)
+					b.destroy();
+			}
+		}
+	}
+
+	/**
 	 * Get the old name associated with this file.
 	 * <p>
 	 * The meaning of the old name can differ depending on the semantic meaning
diff --git a/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java b/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java
index 12c670d..fc30311 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/patch/HunkHeader.java
@@ -41,6 +41,9 @@
 import static org.spearce.jgit.util.RawParseUtils.nextLF;
 import static org.spearce.jgit.util.RawParseUtils.parseBase10;
 
+import java.io.IOException;
+import java.io.OutputStream;
+
 import org.spearce.jgit.lib.AbbreviatedObjectId;
 import org.spearce.jgit.util.MutableInteger;
 
@@ -240,4 +243,87 @@ int parseBody(final Patch script, final int end) {
 
 		return c;
 	}
+
+	void extractFileLines(final OutputStream[] out) throws IOException {
+		final byte[] buf = file.buf;
+		int ptr = startOffset;
+		int eol = nextLF(buf, ptr);
+		if (endOffset <= eol)
+			return;
+
+		// Treat the hunk header as though it were from the ancestor,
+		// as it may have a function header appearing after it which
+		// was copied out of the ancestor file.
+		//
+		out[0].write(buf, ptr, eol - ptr);
+
+		SCAN: for (ptr = eol; ptr < endOffset; ptr = eol) {
+			eol = nextLF(buf, ptr);
+			switch (buf[ptr]) {
+			case ' ':
+			case '\n':
+			case '\\':
+				out[0].write(buf, ptr, eol - ptr);
+				out[1].write(buf, ptr, eol - ptr);
+				break;
+			case '-':
+				out[0].write(buf, ptr, eol - ptr);
+				break;
+			case '+':
+				out[1].write(buf, ptr, eol - ptr);
+				break;
+			default:
+				break SCAN;
+			}
+		}
+	}
+
+	void extractFileLines(final StringBuilder sb, final String[] text,
+			final int[] offsets) {
+		final byte[] buf = file.buf;
+		int ptr = startOffset;
+		int eol = nextLF(buf, ptr);
+		if (endOffset <= eol)
+			return;
+		copyLine(sb, text, offsets, 0);
+		SCAN: for (ptr = eol; ptr < endOffset; ptr = eol) {
+			eol = nextLF(buf, ptr);
+			switch (buf[ptr]) {
+			case ' ':
+			case '\n':
+			case '\\':
+				copyLine(sb, text, offsets, 0);
+				skipLine(text, offsets, 1);
+				break;
+			case '-':
+				copyLine(sb, text, offsets, 0);
+				break;
+			case '+':
+				copyLine(sb, text, offsets, 1);
+				break;
+			default:
+				break SCAN;
+			}
+		}
+	}
+
+	protected void copyLine(final StringBuilder sb, final String[] text,
+			final int[] offsets, final int fileIdx) {
+		final String s = text[fileIdx];
+		final int start = offsets[fileIdx];
+		int end = s.indexOf('\n', start);
+		if (end < 0)
+			end = s.length();
+		else
+			end++;
+		sb.append(s, start, end);
+		offsets[fileIdx] = end;
+	}
+
+	protected void skipLine(final String[] text, final int[] offsets,
+			final int fileIdx) {
+		final String s = text[fileIdx];
+		final int end = s.indexOf('\n', offsets[fileIdx]);
+		offsets[fileIdx] = end < 0 ? s.length() : end + 1;
+	}
 }
diff --git a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
index 55a3001..ff89e9e 100644
--- a/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
+++ b/org.spearce.jgit/src/org/spearce/jgit/util/RawParseUtils.java
@@ -472,6 +472,40 @@ public static String decode(final Charset cs, final byte[] buffer) {
 	 */
 	public static String decode(final Charset cs, final byte[] buffer,
 			final int start, final int end) {
+		try {
+			return decodeNoFallback(cs, buffer, start, end);
+		} catch (CharacterCodingException e) {
+			// Fall back to an ISO-8859-1 style encoding. At least all of
+			// the bytes will be present in the output.
+			//
+			return extractBinaryString(buffer, start, end);
+		}
+	}
+
+	/**
+	 * Decode a region of the buffer under the specified character set if
+	 * possible.
+	 * 
+	 * If the byte stream cannot be decoded that way, the platform default is
+	 * tried and if that too fails, an exception is thrown.
+	 * 
+	 * @param cs
+	 *            character set to use when decoding the buffer.
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @param start
+	 *            first position within the buffer to take data from.
+	 * @param end
+	 *            one position past the last location within the buffer to take
+	 *            data from.
+	 * @return a string representation of the range <code>[start,end)</code>,
+	 *         after decoding the region through the specified character set.
+	 * @throws CharacterCodingException
+	 *             the input is not in any of the tested character sets.
+	 */
+	public static String decodeNoFallback(final Charset cs,
+			final byte[] buffer, final int start, final int end)
+			throws CharacterCodingException {
 		final ByteBuffer b = ByteBuffer.wrap(buffer, start, end - start);
 		b.mark();
 
@@ -508,9 +542,26 @@ public static String decode(final Charset cs, final byte[] buffer,
 			}
 		}
 
-		// Fall back to an ISO-8859-1 style encoding. At least all of
-		// the bytes will be present in the output.
-		//
+		throw new CharacterCodingException();
+	}
+
+	/**
+	 * Decode a region of the buffer under the ISO-8859-1 encoding.
+	 * 
+	 * Each byte is treated as a single character in the 8859-1 character
+	 * encoding, performing a raw binary->char conversion.
+	 * 
+	 * @param buffer
+	 *            buffer to pull raw bytes from.
+	 * @param start
+	 *            first position within the buffer to take data from.
+	 * @param end
+	 *            one position past the last location within the buffer to take
+	 *            data from.
+	 * @return a string representation of the range <code>[start,end)</code>.
+	 */
+	public static String extractBinaryString(final byte[] buffer,
+			final int start, final int end) {
 		final StringBuilder r = new StringBuilder(end - start);
 		for (int i = start; i < end; i++)
 			r.append((char) (buffer[i] & 0xff));
-- 
1.6.1.rc3.302.gb14d9

-- 
Shawn.

^ permalink raw reply related

* Re: white spaces in a patch
From: Matthieu Moy @ 2008-12-17 20:02 UTC (permalink / raw)
  To: Mark Ryden; +Cc: Thomas Jarosch, Sverre Rabbelier, Junio C Hamano, git
In-Reply-To: <dac45060812170422yc259d39vd1b198c1530a40a5@mail.gmail.com>

"Mark Ryden" <markryde@gmail.com> writes:

> Thnks!
> In fact, the first line was enough!
> git config --global color.diff auto

Yes, but you may appreciate color in other commands (log,
status, ...).  Then, "color.ui = auto" is your friend.

-- 
Matthieu

^ permalink raw reply

* Re: Announcement: Git Extensions stable (windows shell extensions)
From: Tim Visher @ 2008-12-17 20:04 UTC (permalink / raw)
  To: Henk; +Cc: git
In-Reply-To: <1229540813648-1669264.post@n2.nabble.com>

On Wed, Dec 17, 2008 at 2:06 PM, Henk <henk_westhuis@hotmail.com> wrote:
>
> This is a shameless announcement of my latest personal project; Git
> Extensions. Git Extensions is a Tortoise-like windows shell extension for
> git. Yesterday I finished version 0.9, the first stable release. I included
> about all git commands I know about, so I think it is pretty complete but
> I'm open to suggestions.

Nice!  With all of this TortoiseGit-like activity we're bound to have
something pretty usable pretty quickly! :)  I'll have to check out the
sources soon and see if I can get involved.  Maybe any of the
developers who were planning on working on TortoiseGit could
officially move over to Git Extensions since it seems to be much
further along?

-- 

In Christ,

Timmy V.

http://burningones.com/
http://five.sentenc.es/ - Spend less time on e-mail

^ permalink raw reply

* Re: How to maintain private/secret/confidential branch.
From: Łukasz Lew @ 2008-12-17 19:57 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: Alexander Potashev, Nick Andrew, git
In-Reply-To: <alpine.LNX.1.00.0812151501570.19665@iabervon.org>

Well, I am still a beginner in git. I just switched from mercurial.
Some inline follows:

2008/12/15 Daniel Barkalow <barkalow@iabervon.org>:
> On Sun, 14 Dec 2008, Łukasz Lew wrote:
>
>> Hi Alexander,
>>
>> On Sun, Dec 14, 2008 at 17:06, Alexander Potashev <aspotashev@gmail.com> wrote:
>> > Hello, Łukasz!
>> >
>> > On 16:38 Sun 14 Dec     , Łukasz Lew wrote:
>> >> Thanks Nick, thats really helpful (and surprisingly simple).
>> >> I have a couple more questions:
>> >>
>> >> On Sun, Dec 14, 2008 at 15:55, Nick Andrew <nick@nick-andrew.net> wrote:
>> >> > On Sun, Dec 14, 2008 at 02:49:50PM +0100, Łukasz Lew wrote:
>> >> >> I don't know how to make such a scenario work:
>> >> >> - two repositories: pub, priv
>> >> >> - priv is clone/branch of pub
>> >> >> - there is some constant developement both in pub and priv
>> >> >> - there are regular syncs with pub in priv
>> >> >>
>> >> >> Problem:
>> >> >> Occasionally I want to push some changes from priv to pub.
>> >> >> Then after syncing with pub I want to get as few conflicts as possible.
>> >> >>
>> >> >> Is it possible to do with git?
>> >> >
>> >> > Git can do almost anything. One should instead ask "How to do this
>> >> > with git?" :-)
>> >>
>> >> So I've heard, but not yet experienced it myself. I'm thrilled to try.
>> >>
>> >> >
>> >> > If I understand your problem, you could solve it with git cherry-pick
>> >> > and rebase. On priv, make a for-public branch from a pub branch. Then
>> >> > cherry-pick the commits you want from your private branch into the
>> >> > for-public branch.
>> >>
>> >> That almost works. Can I somehow split existing commits just like in git-add -p?
>> > It's, however, better to make more commits to not experience the need of
>> > commit splitting.
>>
>> Indeed good advice and best practice, but another best practice is to
>> not commit not compiling state.
>
> In your private branches, it's actually good practice to commit all sorts
> of junk. That way, when you mess up badly while trying to get it to
> compile, you won't have lost your work. Of course, that means your commits
> are going to need more cleanup before going public.

I started to follow your advise.
Then I rebase -i.
I found out I need more precise commit messages. :)

>
>> My common scenario is that I code a big change in priv repository, and
>> after that I find that some of its parts can and should be moved to
>> pub.
>
> I usually end up with my private branch containing the public branch, plus
> a bunch of commits that introduce: bugs, later fixed; mixed improvements;
> and debugging cruft. I want to generate nice commits that are individual
> improvements. I generally do:
> $ git checkout -b submit origin/master (the first time, to set it up)
>
> $ git checkout submit
> $ git diff submit mixed-work
> look at it for good changes, find some in file1 and file2
> $ git diff submit mixed-work -- file1 file2 | git apply

But with this command we do not preserve objects identity.
I.e: when you merge with mixed-work you have duplicate changes.
Is it ok?

> Sometimes, clean up bits that aren't ideal
> $ git add -i
> Add the good parts
> $ git checkout . (revert the working tree to the index)
> $ make test (did I extract the change correctly?)
> $ git commit
> Write a good message, sign off, etc
> $ git checkout mixed-work
> $ git rebase -i submit

... Ah I see, we throw away old commits anyway with rebasing.

> Often, resolve easy conflicts where my mixed-work branch introduced bugs
> that I fixed later and have now adopted the fixed code
>
> Then I repeat until I don't have any more good changes in mixed-work
> (either I have nothing, only debugging cruft, or only stuff I haven't
> gotten to work yet). If there's nothing but cruft, I've fully merged the
> topic, and I delete the branch.
>
> Eventually, I'm satisfied with what I've cleaned up, and I do:
> $ git push origin submit:master
>
> Also, I generally have a bunch of "mixed-work" branches, each containing
> different stuff that isn't ready. I'll periodicly go through all of them
> and rebase onto "submit" or "origin/master" (or, sometimes, give up on
> them and delete them).
>
> (One thing that would be nice to have is a "git apply --interactive" which
> applies the user's choice of hunks, like "git add -i" adds them)

I totally agree.

I would appriciate rebase --copy option, which doesn't move, but copy
the changelists like cherry-pick.
Then we could use rebase -i (with edit) instead of apply.

PS
Why after edit in rebase -i the change is already commited? I always
have to reset;add -i

>
>        -Daniel
> *This .sig left intentionally blank*

^ permalink raw reply

* Is it possible to roll back unstaged changes while leaving the staged ones for the next commit?
From: Tim Visher @ 2008-12-17 19:57 UTC (permalink / raw)
  To: git

Hello Everyone,

I'm attempting to use `git add -i` to help me resolve the differences
between two files in a way that makes sense (leaving class names, some
logic, but changing other things like white space, etc.).  I have all
of the changes that I want to be committed staged now in the index.
My question is this: is it possible to revert the changes that I
haven't staged so that the file looks as if everything inside of it
has been staged because none of the stuff I didn't stage is there
anymore.

Thanks in advance!

-- 

In Christ,

Timmy V.

http://burningones.com/
http://five.sentenc.es/ - Spend less time on e-mail

^ permalink raw reply

* Re: [PATCH] Documentation: fix description for enabling hooks
From: Markus Heidelberg @ 2008-12-17 19:55 UTC (permalink / raw)
  To: Miklos Vajna; +Cc: Johannes Sixt, gitster, git
In-Reply-To: <20081217143627.GB5691@genesis.frugalware.org>

Miklos Vajna, 17.12.2008:
> On Wed, Dec 17, 2008 at 08:44:40AM +0100, Johannes Sixt <j.sixt@viscovery.net> wrote:
> > > This is true, but having the executable bit is necessary as well. I
> > > think it would be better to just append this requirement instead of
> > > replacing the old one with this.
> > 
> > Markus's proposed new wording is correct because the .sample hooks *are*
> > already executable.
> 
> I thought about the following situation: The user reads the
> documentation while working in an older repo (initialized a few versions
> ago). S/he sees that the .sample suffix is already missing, so s/he
> assumes that the hook is already active. Which is not true, because the
> +x bit is missing.

Valid point, I think, but not critical in this case, since the patch
only affected gitrepository-layout(5) and gitglossary(7).

When you want to learn how to use hooks, you will probably rather read
githooks(5), where the need for the executable bit is not even
explicitly mentioned. Maybe it should be added there?

Markus

^ permalink raw reply

* Re: Announcement: Git Extensions stable (windows shell extensions)
From: tekkub @ 2008-12-17 19:51 UTC (permalink / raw)
  To: git
In-Reply-To: <1229540813648-1669264.post@n2.nabble.com>


Very nice, however I ran into the following error while trying to install:
http://www.flickr.com/photos/26681170@N03/3115836831/

Can't wait to try it out!
--tek


Henk wrote:
> 
> This is a shameless announcement of my latest personal project; Git
> Extensions. Git Extensions is a Tortoise-like windows shell extension for
> git. Yesterday I finished version 0.9, the first stable release. I
> included about all git commands I know about, so I think it is pretty
> complete but I'm open to suggestions. 
>  
> It is written mostly in C#, except for shell extension part which is
> written in C++. The project is open source, the sources can be found on
> GitHub. In case there is someone interrested in the sources, be warned;
> the sources are not very well documented yet and the solution is a still
> bit messy, I will clean this up very soon. 
>  
> Main features
> - Shell extensions
> - Visual studio plugin
> - Seperate git application
>  
> Features:
> - Browse repository (incl. visual graph)
> - Add files
> - Apply patch
> - Checkout branch/revision
> - Cherry pick
> - Create branch/tag
> - Delete branch/tag
> - Clone
> - Commit
> - Create (format) patch
> - Init new repository
> - Merge branches
> - Pull
> - Push
> - Run mergetool
> - Stash
> - View differences
>  
> Information about the project and a installer package can be found here:
> http://sourceforge.net/projects/gitextensions/
> The installation requires msysgit to be installed AND git.exe to in the
> system path.
> 
> ps.
> I know there is a TortoiseGit project also, I just didn't know about that
> at the time I started. If I knew about TortoiseGit, I probably never
> started writing my own tool.
> 

-- 
View this message in context: http://n2.nabble.com/Announcement%3A-Git-Extensions-stable-%28windows-shell-extensions%29-tp1669264p1669467.html
Sent from the git mailing list archive at Nabble.com.

^ permalink raw reply

* Re: Can I forbid somebody to pull some branch or tag from my repo with git protocol?
From: Junio C Hamano @ 2008-12-17 19:36 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Emily Ren, git
In-Reply-To: <alpine.DEB.1.00.0812171322330.28560@intel-tinevez-2-302>

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> On Wed, 17 Dec 2008, Emily Ren wrote:
>
>> I want some group can pull these branches or tags from my repo, while
>> other's can't, Need I maintain two repositories ?
>
> Either that (that would be the easy method, and also the proper one, since 
> people would not even know what you hide), but you could patch 
> upload-pack so that it runs a hook with the rev-list arguments in 
> do_rev_list() in upload-pack.c, and die() if the hook returns non-zero.

I do not think that would work very well as you expect.  Two branches can
be pointing at the same commit, and Emily may want to hide one but not the
other.  The time you obtain from "want" is too late.

If you were to extend upload-pack, the place to narrow would be the
initial "here are the refs and the objects they point at" announcement
that is done at the very beginning.  You would do something like the
pseudo patch attached at the end.

read_set_of_exposed_refs_from_hook() should return, depending on who the
user is (which is obviously not available if this connection is over the
anonymous git-daemon service, but local and usual ssh connection you could
do whoami, and on gitosis there would be some environment variable to
distinguish who you are that you can use), the set of refs that the user
is allowed to see.

diff --git i/upload-pack.c w/upload-pack.c
index e5adbc0..129aa1e 100644
--- i/upload-pack.c
+++ w/upload-pack.c
@@ -10,6 +10,10 @@
 #include "revision.h"
 #include "list-objects.h"
 #include "run-command.h"
+#include "string-list.h"
+
+static int use_ref_limiting;
+static struct string_list exposed_refs;
 
 static const char upload_pack_usage[] = "git-upload-pack [--strict] [--timeout=nn] <dir>";
 
@@ -574,8 +578,14 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo
 	static const char *capabilities = "multi_ack thin-pack side-band"
 		" side-band-64k ofs-delta shallow no-progress"
 		" include-tag";
-	struct object *o = parse_object(sha1);
+	struct object *o;
+
+	if (use_ref_limiting && !string_list_has_string(&exposed_refs, refname)) {
+		/* The downloader is not allowed to know the presense of this ref */
+		return 0;
+	}
 
+	o = parse_object(sha1);
 	if (!o)
 		die("git upload-pack: cannot find object %s:", sha1_to_hex(sha1));
 
@@ -600,6 +610,12 @@ static int send_ref(const char *refname, const unsigned char *sha1, int flag, vo
 static void upload_pack(void)
 {
 	reset_timeout();
+
+	if ("limit exposed refs" hook is available) {
+		use_ref_limiting = 1;
+		read_set_of_exposed_refs_from_hook(&exposed_refs);
+	}
+
 	head_ref(send_ref, NULL);
 	for_each_ref(send_ref, NULL);
 	packet_flush(1);

^ permalink raw reply related

* Re: Announcement: Git Extensions stable (windows shell extensions)
From: Sverre Rabbelier @ 2008-12-17 19:23 UTC (permalink / raw)
  To: Henk; +Cc: git
In-Reply-To: <1229540813648-1669264.post@n2.nabble.com>

On Wed, Dec 17, 2008 at 20:06, Henk <henk_westhuis@hotmail.com> wrote:
> I know there is a TortoiseGit project also, I just didn't know about that at
> the time I started. If I knew about TortoiseGit, I probably never started
> writing my own tool.

It seems TortoiseGit started only recently and is not nearly as
feature-complete as Git Extensions, so I wouldn't feel too bad in that
regard ;).

-- 
Cheers,

Sverre Rabbelier

^ permalink raw reply

* Re: Git Notes idea.
From: Junio C Hamano @ 2008-12-17 19:20 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Jeff King, Govind Salinas, Git Mailing List
In-Reply-To: <alpine.DEB.1.00.0812171233270.28560@intel-tinevez-2-302>

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> ...  But the main point 
> still stands: you go from commit to note, not from note to commit.  And 
> this is in stark contrast to tags, where you go from tag to commit, _not_ 
> from commit to tag.
>
> That is a fundamental _difference_ between tags and notes, so that I 
> refuse to accept the notion of notes being a generalized form of tags.

Hmm, how would you explain things like "git describe" (and "name-rev")?

^ permalink raw reply

* Announcement: Git Extensions stable (windows shell extensions)
From: Henk @ 2008-12-17 19:06 UTC (permalink / raw)
  To: git


This is a shameless announcement of my latest personal project; Git
Extensions. Git Extensions is a Tortoise-like windows shell extension for
git. Yesterday I finished version 0.9, the first stable release. I included
about all git commands I know about, so I think it is pretty complete but
I'm open to suggestions. 
 
It is written mostly in C#, except for shell extension part which is written
in C++. The project is open source, the sources can be found on GitHub. In
case there is someone interrested in the sources, be warned; the sources are
not very well documented yet and the solution is a still bit messy, I will
clean this up very soon. 
 
Main features
- Shell extensions
- Visual studio plugin
- Seperate git application
 
Features:
- Browse repository (incl. visual graph)
- Add files
- Apply patch
- Checkout branch/revision
- Cherry pick
- Create branch/tag
- Delete branch/tag
- Clone
- Commit
- Create (format) patch
- Init new repository
- Merge branches
- Pull
- Push
- Run mergetool
- Stash
- View differences
 
Information about the project and a installer package can be found here:
http://sourceforge.net/projects/gitextensions/
The installation requires msysgit to be installed AND git.exe to in the
system path.

ps.
I know there is a TortoiseGit project also, I just didn't know about that at
the time I started. If I knew about TortoiseGit, I probably never started
writing my own tool.
-- 
View this message in context: http://n2.nabble.com/Announcement%3A-Git-Extensions-stable-%28windows-shell-extensions%29-tp1669264p1669264.html
Sent from the git mailing list archive at Nabble.com.

^ permalink raw reply

* [PATCH 5/5] Make 'prepare_temp_file()' ignore st_size for symlinks
From: Linus Torvalds @ 2008-12-17 18:45 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171043440.14014@localhost.localdomain>


From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 17 Dec 2008 10:31:36 -0800

The code was already set up to not really need it, so this just massages
it a bit to remove the use entirely.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---

There's a few raw 'readlink()' calls left, but they all seem to just have 
a big buffer and rely on the return value of readlink() rather than look 
too closely at st_size. But it would probably be good if somebody 
double-checked it all. 

Even so, this should all be better than what we used to have, though.

 diff.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 4b2029c..f160c1a 100644
--- a/diff.c
+++ b/diff.c
@@ -1881,13 +1881,12 @@ static void prepare_temp_file(const char *name,
 		if (S_ISLNK(st.st_mode)) {
 			int ret;
 			char buf[PATH_MAX + 1]; /* ought to be SYMLINK_MAX */
-			size_t sz = xsize_t(st.st_size);
-			if (sizeof(buf) <= st.st_size)
-				die("symlink too long: %s", name);
-			ret = readlink(name, buf, sz);
+			ret = readlink(name, buf, sizeof(buf));
 			if (ret < 0)
 				die("readlink(%s)", name);
-			prep_temp_blob(temp, buf, sz,
+			if (ret == sizeof(buf))
+				die("symlink too long: %s", name);
+			prep_temp_blob(temp, buf, ret,
 				       (one->sha1_valid ?
 					one->sha1 : null_sha1),
 				       (one->sha1_valid ?
-- 
1.6.1.rc3.3.gcc3e3

^ permalink raw reply related

* [PATCH 4/5] Make 'diff_populate_filespec()' use the new 'strbuf_readlink()'
From: Linus Torvalds @ 2008-12-17 18:44 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171043180.14014@localhost.localdomain>


From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 17 Dec 2008 10:26:13 -0800

This makes all tests pass on a system where 'lstat()' has been hacked to
return bogus data in st_size for symlinks.

Of course, the test coverage isn't complete, but it's a good baseline.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 diff.c |   16 +++++++---------
 1 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index afefe08..4b2029c 100644
--- a/diff.c
+++ b/diff.c
@@ -1773,19 +1773,17 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
 		s->size = xsize_t(st.st_size);
 		if (!s->size)
 			goto empty;
-		if (size_only)
-			return 0;
 		if (S_ISLNK(st.st_mode)) {
-			int ret;
-			s->data = xmalloc(s->size);
-			s->should_free = 1;
-			ret = readlink(s->path, s->data, s->size);
-			if (ret < 0) {
-				free(s->data);
+			struct strbuf sb = STRBUF_INIT;
+
+			if (strbuf_readlink(&sb, s->path, s->size))
 				goto err_empty;
-			}
+			s->data = strbuf_detach(&sb, &s->size);
+			s->should_free = 1;
 			return 0;
 		}
+		if (size_only)
+			return 0;
 		fd = open(s->path, O_RDONLY);
 		if (fd < 0)
 			goto err_empty;
-- 
1.6.1.rc3.3.gcc3e3

^ permalink raw reply related

* [PATCH 3/5] Make 'index_path()' use 'strbuf_readlink()'
From: Linus Torvalds @ 2008-12-17 18:43 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171042500.14014@localhost.localdomain>


From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 17 Dec 2008 09:51:53 -0800

This makes us able to properly index symlinks even on filesystems where
st_size doesn't match the true size of the link.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 sha1_file.c |   14 +++++---------
 1 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 0e021c5..52d1ead 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2523,8 +2523,7 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object)
 {
 	int fd;
-	char *target;
-	size_t len;
+	struct strbuf sb = STRBUF_INIT;
 
 	switch (st->st_mode & S_IFMT) {
 	case S_IFREG:
@@ -2537,20 +2536,17 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write
 				     path);
 		break;
 	case S_IFLNK:
-		len = xsize_t(st->st_size);
-		target = xmalloc(len + 1);
-		if (readlink(path, target, len + 1) != st->st_size) {
+		if (strbuf_readlink(&sb, path, st->st_size)) {
 			char *errstr = strerror(errno);
-			free(target);
 			return error("readlink(\"%s\"): %s", path,
 			             errstr);
 		}
 		if (!write_object)
-			hash_sha1_file(target, len, blob_type, sha1);
-		else if (write_sha1_file(target, len, blob_type, sha1))
+			hash_sha1_file(sb.buf, sb.len, blob_type, sha1);
+		else if (write_sha1_file(sb.buf, sb.len, blob_type, sha1))
 			return error("%s: failed to insert into database",
 				     path);
-		free(target);
+		strbuf_release(&sb);
 		break;
 	case S_IFDIR:
 		return resolve_gitlink_ref(path, "HEAD", sha1);
-- 
1.6.1.rc3.3.gcc3e3

^ permalink raw reply related

* [PATCH 2/5] Make 'ce_compare_link()' use the new 'strbuf_readlink()'
From: Linus Torvalds @ 2008-12-17 18:43 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171042120.14014@localhost.localdomain>


From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 17 Dec 2008 09:47:27 -0800

This simplifies the code, and also makes ce_compare_link now able to
handle filesystems with odd 'st_size' return values for symlinks.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 read-cache.c |   22 ++++++++--------------
 1 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index c14b562..b1475ff 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -99,27 +99,21 @@ static int ce_compare_data(struct cache_entry *ce, struct stat *st)
 static int ce_compare_link(struct cache_entry *ce, size_t expected_size)
 {
 	int match = -1;
-	char *target;
 	void *buffer;
 	unsigned long size;
 	enum object_type type;
-	int len;
+	struct strbuf sb = STRBUF_INIT;
 
-	target = xmalloc(expected_size);
-	len = readlink(ce->name, target, expected_size);
-	if (len != expected_size) {
-		free(target);
+	if (strbuf_readlink(&sb, ce->name, expected_size))
 		return -1;
-	}
+
 	buffer = read_sha1_file(ce->sha1, &type, &size);
-	if (!buffer) {
-		free(target);
-		return -1;
+	if (buffer) {
+		if (size == sb.len)
+			match = memcmp(buffer, sb.buf, size);
+		free(buffer);
 	}
-	if (size == expected_size)
-		match = memcmp(buffer, target, size);
-	free(buffer);
-	free(target);
+	strbuf_release(&sb);
 	return match;
 }
 
-- 
1.6.1.rc3.3.gcc3e3

^ permalink raw reply related

* [PATCH 1/5] Add generic 'strbuf_readlink()' helper function
From: Linus Torvalds @ 2008-12-17 18:42 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List
In-Reply-To: <alpine.LFD.2.00.0812171034520.14014@localhost.localdomain>


From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 17 Dec 2008 09:36:40 -0800

It was already what 'git apply' did in read_old_data(), just export it
as a real function, and make it be more generic.

In particular, this handles the case of the lstat() st_size data not
matching the readlink() return value properly (which apparently happens
at least on NTFS under Linux).  But as a result of this you could also
use the new function without even knowing how big the link is going to
be, and it will allocate an appropriately sized buffer.

So we pass in the st_size of the link as just a hint, rather than a
fixed requirement.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 builtin-apply.c |    6 ++----
 strbuf.c        |   28 ++++++++++++++++++++++++++++
 strbuf.h        |    1 +
 3 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/builtin-apply.c b/builtin-apply.c
index 4c4d1e1..07244b0 100644
--- a/builtin-apply.c
+++ b/builtin-apply.c
@@ -1559,10 +1559,8 @@ static int read_old_data(struct stat *st, const char *path, struct strbuf *buf)
 {
 	switch (st->st_mode & S_IFMT) {
 	case S_IFLNK:
-		strbuf_grow(buf, st->st_size);
-		if (readlink(path, buf->buf, st->st_size) != st->st_size)
-			return -1;
-		strbuf_setlen(buf, st->st_size);
+		if (strbuf_readlink(buf, path, st->st_size) < 0)
+			return error("unable to read symlink %s", path);
 		return 0;
 	case S_IFREG:
 		if (strbuf_read_file(buf, path, st->st_size) != st->st_size)
diff --git a/strbuf.c b/strbuf.c
index 13be67e..904a2b0 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -288,6 +288,34 @@ ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint)
 	return sb->len - oldlen;
 }
 
+#define STRBUF_MAXLINK (2*PATH_MAX)
+
+int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint)
+{
+	if (hint < 32)
+		hint = 32;
+
+	while (hint < STRBUF_MAXLINK) {
+		int len;
+
+		strbuf_grow(sb, hint);
+		len = readlink(path, sb->buf, hint);
+		if (len < 0) {
+			if (errno != ERANGE)
+				break;
+		} else if (len < hint) {
+			strbuf_setlen(sb, len);
+			return 0;
+		}
+
+		/* .. the buffer was too small - try again */
+		hint *= 2;
+		continue;
+	}
+	strbuf_release(sb);
+	return -1;
+}
+
 int strbuf_getline(struct strbuf *sb, FILE *fp, int term)
 {
 	int ch;
diff --git a/strbuf.h b/strbuf.h
index b1670d9..89bd36e 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -124,6 +124,7 @@ extern size_t strbuf_fread(struct strbuf *, size_t, FILE *);
 /* XXX: if read fails, any partial read is undone */
 extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint);
 extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint);
+extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint);
 
 extern int strbuf_getline(struct strbuf *, FILE *, int);
 
-- 
1.6.1.rc3.3.gcc3e3

^ permalink raw reply related

* [PATCH 0/5] Be careful about lstat()-vs-readlink()
From: Linus Torvalds @ 2008-12-17 18:42 UTC (permalink / raw)
  To: Junio C Hamano, Git Mailing List



This series of five patches makes us a lot more careful about readlink(), 
and in particular it avoids the code that depends on doing an lstat(), and 
then using st_size as the length of the link. That's what POSIX says 
_should_ happen, but Ramon Tayag reported git failing on NTFS under Linux 
because st_size doesn't match readlink() return value.

It doesn't seem to be unknown elsewhere either, since coreutils (through 
gnulib's areadlink_with_size) also has a big compatibility layer around 
readlink().

This series has been 'tested' by running it with the appended totally 
hacky patch that fakes out the lstat() st_size values for symlinks, so it 
has actually had some testing. 

The complete series does:

Linus Torvalds (5):
  Add generic 'strbuf_readlink()' helper function
  Make 'ce_compare_link()' use the new 'strbuf_readlink()'
  Make 'index_path()' use 'strbuf_readlink()'
  Make 'diff_populate_filespec()' use the new 'strbuf_readlink()'
  Make 'prepare_temp_file()' ignore st_size for symlinks

 builtin-apply.c |    6 ++----
 diff.c          |   25 +++++++++++--------------
 read-cache.c    |   22 ++++++++--------------
 sha1_file.c     |   14 +++++---------
 strbuf.c        |   28 ++++++++++++++++++++++++++++
 strbuf.h        |    1 +
 6 files changed, 55 insertions(+), 41 deletions(-)

and the hacky test-patch (which is obviously _not_ meant to be applied) is 
as follows...

		Linus

---
diff --git a/cache.h b/cache.h
index 231c06d..2d85fca 100644
--- a/cache.h
+++ b/cache.h
@@ -943,4 +943,7 @@ void overlay_tree_on_cache(const char *tree_name, const char *prefix);
 char *alias_lookup(const char *alias);
 int split_cmdline(char *cmdline, const char ***argv);
 
+extern int gitlstat(const char *, struct stat *);
+#define lstat gitlstat
+
 #endif /* CACHE_H */
diff --git a/read-cache.c b/read-cache.c
index b1475ff..defbb20 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -15,6 +15,16 @@
 #include "revision.h"
 #include "blob.h"
 
+#undef lstat
+int gitlstat(const char *path, struct stat *buf)
+{
+	int retval = lstat(path, buf);
+	if (!retval && S_ISLNK(buf->st_mode))
+		buf->st_size = 1;
+	return retval;
+}
+#define lstat gitlstat
+
 /* Index extensions.
  *
  * The first letter should be 'A'..'Z' for extensions that are not

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox