Git development

Git development
 help / color / mirror / Atom feed

* [RFC PATCH 2/2] Make it possible to apply a range of changes at once
From: Jeff Epler @ 2009-10-21 21:20 UTC (permalink / raw)
  To: git; +Cc: Jeff Epler
In-Reply-To: <1256160023-29629-1-git-send-email-jepler@unpythonic.net>

---
The diff looks bigger than it is because it changed the indentation
level of about 80 lines, and that made it necessary to reflow a lengthy
commit block as well.

 git-gui.sh   |   15 +++-
 lib/diff.tcl |  225 ++++++++++++++++++++++++++++++++--------------------------
 2 files changed, 135 insertions(+), 105 deletions(-)

diff --git a/git-gui.sh b/git-gui.sh
index 09b2720..c69d904 100755
--- a/git-gui.sh
+++ b/git-gui.sh
@@ -3194,7 +3194,7 @@ set ui_diff_applyhunk [$ctxm index last]
 lappend diff_actions [list $ctxm entryconf $ui_diff_applyhunk -state]
 $ctxm add command \
 	-label [mc "Apply/Reverse Line"] \
-	-command {apply_line $cursorX $cursorY; do_rescan}
+	-command {apply_range_or_line $cursorX $cursorY; do_rescan}
 set ui_diff_applyline [$ctxm index last]
 lappend diff_actions [list $ctxm entryconf $ui_diff_applyline -state]
 $ctxm add separator
@@ -3234,12 +3234,21 @@ proc popup_diff_menu {ctxm ctxmmg x y X Y} {
 	if {[string first {U} $state] >= 0} {
 		tk_popup $ctxmmg $X $Y
 	} else {
+		set has_range [expr {[$::ui_diff tag nextrange sel 0.0] != {}}]
 		if {$::ui_index eq $::current_diff_side} {
 			set l [mc "Unstage Hunk From Commit"]
-			set t [mc "Unstage Line From Commit"]
+			if {$has_range} {
+				set t [mc "Unstage Lines From Commit"]
+			} else {
+				set t [mc "Unstage Line From Commit"]
+			}
 		} else {
 			set l [mc "Stage Hunk For Commit"]
-			set t [mc "Stage Line For Commit"]
+			if {$has_range} {
+				set t [mc "Stage Lines For Commit"]
+			} else {
+				set t [mc "Stage Line For Commit"]
+			}
 		}
 		if {$::is_3way_diff || $::is_submodule_diff
 			|| $current_diff_path eq {}
diff --git a/lib/diff.tcl b/lib/diff.tcl
index 066755b..0fe3ec6 100644
--- a/lib/diff.tcl
+++ b/lib/diff.tcl
@@ -533,10 +533,23 @@ proc apply_hunk {x y} {
 	}
 }
 
-proc apply_line {x y} {
+proc apply_range_or_line {x y} {
 	global current_diff_path current_diff_header current_diff_side
 	global ui_diff ui_index file_states
 
+	set selected [$ui_diff tag nextrange sel 0.0]
+
+	if {$selected == {}} {
+		set first [$ui_diff index "@$x,$y"]
+		set last $first
+	} else {
+		set first [lindex $selected 0]
+		set last [lindex $selected 1]
+	}
+
+	set first_l [$ui_diff index "$first linestart"]
+	set last_l [$ui_diff index "$last lineend"]
+
 	if {$current_diff_path eq {} || $current_diff_header eq {}} return
 	if {![lock_index apply_hunk]} return
 
@@ -559,120 +572,128 @@ proc apply_line {x y} {
 		}
 	}
 
-	set the_l [$ui_diff index @$x,$y]
+	set wholepatch {}
 
-	# operate only on change lines
-	set c1 [$ui_diff get "$the_l linestart"]
-	if {$c1 ne {+} && $c1 ne {-}} {
-		unlock_index
-		return
-	}
-	set sign $c1
-
-	set i_l [$ui_diff search -backwards -regexp ^@@ $the_l 0.0]
-	if {$i_l eq {}} {
-		unlock_index
-		return
-	}
-	# $i_l is now at the beginning of a line
+	while {$first_l < $last_l} {
+		set i_l [$ui_diff search -backwards -regexp ^@@ $first_l 0.0]
+		if {$i_l eq {}} {
+			# If there's not a @@ above, then the selected range
+			# must have come before the first_l @@
+			set i_l [$ui_diff search -regexp ^@@ $first_l $last_l]
+		}
+		if {$i_l eq {}} {
+			unlock_index
+			return
+		}
+		# $i_l is now at the beginning of a line
 
-	# pick start line number from hunk header
-	set hh [$ui_diff get $i_l "$i_l + 1 lines"]
-	set hh [lindex [split $hh ,] 0]
-	set hln [lindex [split $hh -] 1]
+		# pick start line number from hunk header
+		set hh [$ui_diff get $i_l "$i_l + 1 lines"]
+		set hh [lindex [split $hh ,] 0]
+		set hln [lindex [split $hh -] 1]
 
-	# There is a special situation to take care of. Consider this hunk:
-	#
-	#    @@ -10,4 +10,4 @@
-	#     context before
-	#    -old 1
-	#    -old 2
-	#    +new 1
-	#    +new 2
-	#     context after
-	#
-	# We used to keep the context lines in the order they appear in the
-	# hunk. But then it is not possible to correctly stage only
-	# "-old 1" and "+new 1" - it would result in this staged text:
-	#
-	#    context before
-	#    old 2
-	#    new 1
-	#    context after
-	#
-	# (By symmetry it is not possible to *un*stage "old 2" and "new 2".)
-	#
-	# We resolve the problem by introducing an asymmetry, namely, when
-	# a "+" line is *staged*, it is moved in front of the context lines
-	# that are generated from the "-" lines that are immediately before
-	# the "+" block. That is, we construct this patch:
-	#
-	#    @@ -10,4 +10,5 @@
-	#     context before
-	#    +new 1
-	#     old 1
-	#     old 2
-	#     context after
-	#
-	# But we do *not* treat "-" lines that are *un*staged in a special
-	# way.
-	#
-	# With this asymmetry it is possible to stage the change
-	# "old 1" -> "new 1" directly, and to stage the change
-	# "old 2" -> "new 2" by first staging the entire hunk and
-	# then unstaging the change "old 1" -> "new 1".
-
-	# This is non-empty if and only if we are _staging_ changes;
-	# then it accumulates the consecutive "-" lines (after converting
-	# them to context lines) in order to be moved after the "+" change
-	# line.
-	set pre_context {}
-
-	set n 0
-	set i_l [$ui_diff index "$i_l + 1 lines"]
-	set patch {}
-	while {[$ui_diff compare $i_l < "end - 1 chars"] &&
-	       [$ui_diff get $i_l "$i_l + 2 chars"] ne {@@}} {
-		set next_l [$ui_diff index "$i_l + 1 lines"]
-		set c1 [$ui_diff get $i_l]
-		if {[$ui_diff compare $i_l <= $the_l] &&
-		    [$ui_diff compare $the_l < $next_l]} {
-			# the line to stage/unstage
-			set ln [$ui_diff get $i_l $next_l]
-			if {$c1 eq {-}} {
-				set n [expr $n+1]
+		# There is a special situation to take care of. Consider this
+		# hunk:
+		#
+		#    @@ -10,4 +10,4 @@
+		#     context before
+		#    -old 1
+		#    -old 2
+		#    +new 1
+		#    +new 2
+		#     context after
+		#
+		# We used to keep the context lines in the order they appear in
+		# the hunk. But then it is not possible to correctly stage only
+		# "-old 1" and "+new 1" - it would result in this staged text:
+		#
+		#    context before
+		#    old 2
+		#    new 1
+		#    context after
+		#
+		# (By symmetry it is not possible to *un*stage "old 2" and "new
+		# 2".)
+		#
+		# We resolve the problem by introducing an asymmetry, namely,
+		# when a "+" line is *staged*, it is moved in front of the
+		# context lines that are generated from the "-" lines that are
+		# immediately before the "+" block. That is, we construct this
+		# patch:
+		#
+		#    @@ -10,4 +10,5 @@
+		#     context before
+		#    +new 1
+		#     old 1
+		#     old 2
+		#     context after
+		#
+		# But we do *not* treat "-" lines that are *un*staged in a
+		# special way.
+		#
+		# With this asymmetry it is possible to stage the change "old
+		# 1" -> "new 1" directly, and to stage the change "old 2" ->
+		# "new 2" by first staging the entire hunk and then unstaging
+		# the change "old 1" -> "new 1".
+
+		# This is non-empty if and only if we are _staging_ changes;
+		# then it accumulates the consecutive "-" lines (after
+		# converting them to context lines) in order to be moved after
+		# the "+" change line.
+		set pre_context {}
+
+		set n 0
+		set m 0
+		set i_l [$ui_diff index "$i_l + 1 lines"]
+		set patch {}
+		while {[$ui_diff compare $i_l < "end - 1 chars"] &&
+		       [$ui_diff get $i_l "$i_l + 2 chars"] ne {@@}} {
+			set next_l [$ui_diff index "$i_l + 1 lines"]
+			set c1 [$ui_diff get $i_l]
+			if {[$ui_diff compare $first_l <= $i_l] &&
+			    [$ui_diff compare $i_l < $last_l] &&
+			    ($c1 eq {-} || $c1 eq {+})} {
+				# a line to stage/unstage
+				set ln [$ui_diff get $i_l $next_l]
+				if {$c1 eq {-}} {
+					set n [expr $n+1]
+					set patch "$patch$pre_context$ln"
+				} else {
+					set m [expr $m+1]
+					set patch "$patch$ln$pre_context"
+				}
+				set pre_context {}
+			} elseif {$c1 ne {-} && $c1 ne {+}} {
+				# context line
+				set ln [$ui_diff get $i_l $next_l]
 				set patch "$patch$pre_context$ln"
-			} else {
-				set patch "$patch$ln$pre_context"
-			}
-			set pre_context {}
-		} elseif {$c1 ne {-} && $c1 ne {+}} {
-			# context line
-			set ln [$ui_diff get $i_l $next_l]
-			set patch "$patch$pre_context$ln"
-			set n [expr $n+1]
-			set pre_context {}
-		} elseif {$c1 eq $to_context} {
-			# turn change line into context line
-			set ln [$ui_diff get "$i_l + 1 chars" $next_l]
-			if {$c1 eq {-}} {
-				set pre_context "$pre_context $ln"
-			} else {
-				set patch "$patch $ln"
+				set n [expr $n+1]
+				set m [expr $m+1]
+				set pre_context {}
+			} elseif {$c1 eq $to_context} {
+				# turn change line into context line
+				set ln [$ui_diff get "$i_l + 1 chars" $next_l]
+				if {$c1 eq {-}} {
+					set pre_context "$pre_context $ln"
+				} else {
+					set patch "$patch $ln"
+				}
+				set n [expr $n+1]
+				set m [expr $m+1]
 			}
-			set n [expr $n+1]
+			set i_l $next_l
 		}
-		set i_l $next_l
+		set patch "$patch$pre_context"
+		set wholepatch "$wholepatch@@ -$hln,$n +$hln,$m @@\n$patch"
+		set first_l [$ui_diff index "$next_l + 1 lines"]
 	}
-	set patch "$patch$pre_context"
-	set patch "@@ -$hln,$n +$hln,[eval expr $n $sign 1] @@\n$patch"
 
 	if {[catch {
 		set enc [get_path_encoding $current_diff_path]
 		set p [eval git_write $apply_cmd]
 		fconfigure $p -translation binary -encoding $enc
 		puts -nonewline $p $current_diff_header
-		puts -nonewline $p $patch
+		puts -nonewline $p $wholepatch
 		close $p} err]} {
 		error_popup [append $failed_msg "\n\n$err"]
 	}
-- 
1.6.5.rc1.49.ge970

^ permalink raw reply related

* [RFC PATCH v2 0/2] git-gui: (un)stage a range of changes at once
From: Jeff Epler @ 2009-10-21 21:20 UTC (permalink / raw)
  To: git; +Cc: Jeff Epler

Compared to the first version, I fixed a bug concerning staging line(s)
when all following lines are deletions (a preexisting bug in git-gui).
This version is made based off the master branch of git-gui.git, rather
than the master branch of git.git.

The first change fixes a long-standing git-gui bug in an area that the
new feature is rewriting anyway.  If there's interest in the new feature
then maybe the two should just be squashed (using the message from the
second).  If not, it'd be nice to see the bugfix applied anyway.

Jeff Epler (2):
  Fix applying a line when all following lines are deletions
  Make it possible to apply a range of changes at once

 git-gui.sh   |   15 +++-
 lib/diff.tcl |  224 ++++++++++++++++++++++++++++++++--------------------------
 2 files changed, 135 insertions(+), 104 deletions(-)

^ permalink raw reply

* [RFC PATCH 1/2] Fix applying a line when all following lines are deletions
From: Jeff Epler @ 2009-10-21 21:20 UTC (permalink / raw)
  To: git; +Cc: Jeff Epler
In-Reply-To: <1256160023-29629-1-git-send-email-jepler@unpythonic.net>

If a diff looked like
 @@
  context
 -del1
 -del2
and you wanted to stage the deletion 'del1', the generated patch
wouldn't apply because it was missing the line 'del2' converted to
context, but this line was counted in the @@-line
---
 lib/diff.tcl |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/lib/diff.tcl b/lib/diff.tcl
index bd5d189..066755b 100644
--- a/lib/diff.tcl
+++ b/lib/diff.tcl
@@ -664,6 +664,7 @@ proc apply_line {x y} {
 		}
 		set i_l $next_l
 	}
+	set patch "$patch$pre_context"
 	set patch "@@ -$hln,$n +$hln,[eval expr $n $sign 1] @@\n$patch"
 
 	if {[catch {
-- 
1.6.5.rc1.49.ge970

^ permalink raw reply related

* Re: [Foundation-l] Wikipedia meets git
From: Nicolas Pitre @ 2009-10-21 21:05 UTC (permalink / raw)
  To: Bernie Innocenti; +Cc: Samuel Klein, Wikimedia Foundation Mailing List, git
In-Reply-To: <1256154567.1477.87.camel@giskard>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1818 bytes --]

On Wed, 21 Oct 2009, Bernie Innocenti wrote:

> And here's the the catch: the history of individual files is not
> directly represented in a git repository. It is typically scattered
> across thousands of commit objects, with no direct links to help find
> them. If you want to retrieve the log of a file that was changed only 6
> times in the entire history of the Linux kernel, you'd have to dig
> through *all* of the 170K revisions in the "master" branch.
> 
> And it takes some time even if git is blazingly fast:
> 
>  bernie@giskard:~/src/kernel/linux-2.6$ time git log  --pretty=oneline REPORTING-BUGS  | wc -l 
>  6
> 
>  real	0m1.668s
>  user	0m1.416s
>  sys	0m0.210s
> 
> (my laptop has a low-power CPU. A fast server would be 8-10x faster).
> 
> 
> Now, the English Wikipedia seems to have slightly more than 3M articles,
> with--how many? tenths of millions of revisions for sure. Going through
> them *every time* one needs to consult the history of a file would be
> 100x slower. Tens of seconds. Not acceptable, uh?
> 
> It seems to me that the typical usage pattern of an encyclopedia is to
> change each article individually. Perhaps I'm underestimating the role
> of bots here. Anyway, there's no consistency *requirement* for mass
> changes to be applied atomically throughout all the encyclopedia, right?

You certainly don't need to put all files in the same tree then.  
Having the whole thing split according to some sections that are 
unlikely to overlap would be the way to go.  Therefore you could arrange 
subsections to have their own branches with no other files in them, or 
even rely on Git submodules.  The partitioning doesn't necessarily have 
to be one of the two extremes such as one branch per file à la CVS or 
all files in the same branch/tree as Git does by default.


Nicolas

^ permalink raw reply

* Re: [PATCH 3/3] git checkout --nodwim
From: Nanako Shiraishi @ 2009-10-21 21:21 UTC (permalink / raw)
  To: Avery Pennarun
  Cc: Junio C Hamano, Alex Riesen, git, Johannes Schindelin,
	Jay Soffian
In-Reply-To: <32541b130910211029x2f4295c3w40dd13b3cdc7762c@mail.gmail.com>

Quoting Avery Pennarun <apenwarr@gmail.com>

> On Sun, Oct 18, 2009 at 3:53 PM, Junio C Hamano <gitster@pobox.com> wrote:
>> Helping hands in polishing it up is very welcome.
>
> I find the idea of an option for "don't do what I mean" to be pretty
> entertaining.  Or maybe just misleading :)
>
> Have fun,
>
> Avery

As Junio asked for helping hands, let's try to be helpful and constructive.

Maybe "don't second-guess" explains it better?

-- 
Nanako Shiraishi
http://ivory.ap.teacup.com/nanako3/

^ permalink raw reply

* Re: [PATCH] git push: say that --tag can't be used with --all or --mirror in help text
From: Nanako Shiraishi @ 2009-10-21 21:21 UTC (permalink / raw)
  To: Jeff King
  Cc: Junio C Hamano, Miklos Vajna, Sebastian Pipping, git,
	Bjorn Gustavsson
In-Reply-To: <20091021144203.GA30485@coredump.intra.peff.net>

Quoting Jeff King <peff@peff.net>

> On Mon, Oct 19, 2009 at 12:57:01PM +0900, Nanako Shiraishi wrote:
>
>> -		OPT_BOOLEAN( 0 , "tags", &tags, "push tags"),
>> +		OPT_BOOLEAN( 0 , "tags", &tags, "push tags (can't be used with --all nor --mirror"),
>
> Hmm. We apparently all managed to miss this typo. It's visually hard to
> notice because of the ")" closing the macro.

How embarrassing. Thank you for a fix.

-- 
Nanako Shiraishi
http://ivory.ap.teacup.com/nanako3/

^ permalink raw reply

* Re: keeping track of where a patch begins
From: Nicolas Pitre @ 2009-10-21 20:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: E R, git, Jeff King
In-Reply-To: <7veiow4iqc.fsf@alter.siamese.dyndns.org>

On Wed, 21 Oct 2009, Junio C Hamano wrote:

> Nicolas Pitre <nico@fluxnic.net> writes:
> 
> > On Wed, 21 Oct 2009, E R wrote:
> >
> >> What solutions have you come up with to either to catch or prevent
> >> this from happening? It is possible to determine what node a branch
> >> started from?
> >
> > This can be determined by looking at the gitk output.
> >
> > Also 'git merge-base' can give you that node, given the main branch and 
> > the topic branch.  See documentation about git-merge-base.
> >
> > Then if you need to move a branch to another starting node, then 'git 
> > rebase' is what you need (again the git-rebase documentation is pretty 
> > detailed).
> 
> That is a correct way to diagnose the mistake and recover from it, but
> unfortunately it is a rather weak tool to identify the mistake in the
> first place.

Well... The "mistake" is probably going to be different depending on the 
work flow used.  I don't think there is a generic definition of such 
mistakes.

In this case, simply having

	if [ $(git merge-base $expected_branch_point $branch) != \
	     $(git rev-parse $expected_branch_point) ]; then
		(complain/refuse the merge of $branch)
	fi

should be quite sufficient as an enforcing proper branch policy.  Of 
course the $expected_branch_point is something that is determined 
outside of Git.

> A branch in git, as Randal often used to say on #git, is an illusion---it
> points only at the top and does not identify the bottom.
> 
> But it does _not_ have to stay that way at the Porcelain level.
> 
> Here is a rough sketch of one possible solution.  It is not fully thought
> out; the basic idea is probably sound but I did not try to exhaustively
> cover changes to various tools that are necessary to maintain the
> invariants this scheme requires.

I never came across a situation where such an elaborated scheme was 
needed to actually record and maintain that information, or could be 
really useful.  And some branches might be built on top of a sub-branch 
already, making the real branch's bottom the sub-branch's instead in a 
given context.  It all depends on the work flow and the convention used 
for a project.  And the tool has no way to figure that out (is this the 
real branch bottom or should it be one or more level down?), etc.

> We probably could kill the other bird in the nearby thread that wants to
> add a description to a branch, if this scheme is fully implemented

Well, I think we gain in flexibility by keeping those things separate 
though.  Blending data structures together is not always a good thing.

We have reflog data separate from the refs themselves, so I think that 
having .git/desc/refs/* containing simple text files would be good 
enough and simple to implement/use.

Nicolas

^ permalink raw reply

* Re: [PATCH] Documentation/fetch-options.txt: order options alphabetically
From: Junio C Hamano @ 2009-10-21 20:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jari Aalto, git
In-Reply-To: <7vpr8g32ht.fsf@alter.siamese.dyndns.org>

Junio C Hamano <gitster@pobox.com> writes:

> Jari Aalto <jari.aalto@cante.net> writes:
>
>> Signed-off-by: Jari Aalto <jari.aalto@cante.net>
>> ---
>>  Documentation/fetch-options.txt |   48 +++++++++++++++++++-------------------
>>  1 files changed, 24 insertions(+), 24 deletions(-)
>
> Does this even make sense when git-pull.txt itself includes files other
> than this one?  fetch-options.txt may begin with the description of -a
> that is very early in the alphabetical sequence, but git-pull.txt includes
> merge-options.txt before this file, whose contents would describe what
> probably would come after -a (I didn't check).  Also git-pull.txt itself
> has description of --rebase and --no-rebase that definitely come before -a
> (I did check).

Ah, in your defense ;-) I think you looked only at git-fetch.txt without
checking where else this file is included.  Then the patch certainly is
understandable.  It would probably make git-fetch.{1,html} easier to scan,
while making things not worse for git-pull.{1,html}

^ permalink raw reply

* Re: [PATCH] Documentation/fetch-options.txt: order options alphabetically
From: Junio C Hamano @ 2009-10-21 20:39 UTC (permalink / raw)
  To: Jari Aalto; +Cc: git
In-Reply-To: <87eiow1pey.fsf@jondo.cante.net>

Jari Aalto <jari.aalto@cante.net> writes:

> Signed-off-by: Jari Aalto <jari.aalto@cante.net>
> ---
>  Documentation/fetch-options.txt |   48 +++++++++++++++++++-------------------
>  1 files changed, 24 insertions(+), 24 deletions(-)

Does this even make sense when git-pull.txt itself includes files other
than this one?  fetch-options.txt may begin with the description of -a
that is very early in the alphabetical sequence, but git-pull.txt includes
merge-options.txt before this file, whose contents would describe what
probably would come after -a (I didn't check).  Also git-pull.txt itself
has description of --rebase and --no-rebase that definitely come before -a
(I did check).

^ permalink raw reply

* Re: [Foundation-l] Wikipedia meets git
From: Avery Pennarun @ 2009-10-21 20:31 UTC (permalink / raw)
  To: Bernie Innocenti; +Cc: Samuel Klein, Wikimedia Foundation Mailing List, git
In-Reply-To: <1256154567.1477.87.camel@giskard>

On Wed, Oct 21, 2009 at 3:49 PM, Bernie Innocenti <bernie@codewiz.org> wrote:
> And here's the the catch: the history of individual files is not
> directly represented in a git repository. It is typically scattered
> across thousands of commit objects, with no direct links to help find
> them. If you want to retrieve the log of a file that was changed only 6
> times in the entire history of the Linux kernel, you'd have to dig
> through *all* of the 170K revisions in the "master" branch.
>
> And it takes some time even if git is blazingly fast:
>
>  bernie@giskard:~/src/kernel/linux-2.6$ time git log  --pretty=oneline REPORTING-BUGS  | wc -l
>  6
>
>  real   0m1.668s
>  user   0m1.416s
>  sys    0m0.210s
>
> (my laptop has a low-power CPU. A fast server would be 8-10x faster).
>
>
> Now, the English Wikipedia seems to have slightly more than 3M articles,
> with--how many? tenths of millions of revisions for sure. Going through
> them *every time* one needs to consult the history of a file would be
> 100x slower. Tens of seconds. Not acceptable, uh?

I think this slowness could be overcome using a simple cache of
filename -> commitid list, right?

That is, you run some variant of "git log --name-only" and, for each
file changed by each commit, add an element to the commit list for
that file.  When committing in the future, use a hook that updates the
cache.  When you want to view the history of a particular file, simply
retrieve exactly the list of commits in that file's commitlist, not
other commits.

It sounds like such a cache could be implemented quite easily outside
of git itself.

Would that help?

That said, I'll bet you find other performance glitches when you
import millions of files and tens/hundreds of millions of commits.
But we probably won't know what those problems are until someone
imports them :)

Have fun,

Avery

^ permalink raw reply

* Re: [RFC] What to you think about a loose status for submodules?
From: Junio C Hamano @ 2009-10-21 20:23 UTC (permalink / raw)
  To: Heiko Voigt; +Cc: git, Jens Lehmann, Lars Hjemli
In-Reply-To: <20091021160122.GA2067@book.hvoigt.net>

Heiko Voigt <hvoigt@hvoigt.net> writes:

> For such a workflow I would like to implement what I call 'loose'
> submodules. Where a
>
>   git clone project.git
>   cd project
>   git submodule init && git submodule update
>
> would omit the 'help' folder. But in case I specify it directly like

I thought a blanket "submodule init/update" wasn't even a recommended
practice for this exact reason.  We tried to keep the default not to
gratuitously populate and checkout all submodule repositories, but
probably what you are trying to do was made more difficult by mistake
because people who wanted the other behaviour pushed too hard?

Defaulting to "do not populate and checkout unless explicitly asked"
sounds like the right thing to do, and if we broke it, it should be
corrected, I think.  Shouldn't it be a simple matter of teaching "--all"
option to "submodule init" (and "update") to let them blindly affect all
submodules, while making the default not to do anything?

>   git submodule init help
>
> it would update to the recorded revision.
>
> Of course the relation would be configurable. E.g.:
>
>   git config submodule."name".relation loose
>
> and the opposite as
>
>   git config submodule."name".relation tight

I do not think this should be a project-wide configuration that is
recorded in .gitmodules; if you are "help documentation" participant to
the project you would want "help" submodule, and other people will want
different submodules.

It would probably make more sense to introduce the notion of "module
groups", similar to the way "remote update <group>" can name a group of
remotes to affect.  Then documentation people can say

    submodule init doc && submodule update

if .gitmodules file records the mapping from "doc" to one or more
submodules (e.g. "help" and "doc").  If we are going to take this route,
it would still make sense to teach "--all" to "submodule init" and perhaps
default to init the "default" group if one exists, instead of making the
parameterless "init" a no-op as I suggested earlier.

But it is quite a long time since I looked at git-submodule.sh so please
take the above with a healthy dose of salt.

^ permalink raw reply

* Re: [Foundation-l] Wikipedia meets git
From: jamesmikedupont @ 2009-10-21 20:08 UTC (permalink / raw)
  To: Bernie Innocenti; +Cc: Samuel Klein, Wikimedia Foundation Mailing List, git
In-Reply-To: <1256154567.1477.87.camel@giskard>

Wow,
I am impressed.
Let me remind you of one thing,
most people are working on very small subsets of the data. Very few
people will want to have all the data, think about getting all the
versions from all the git repos, it would be the same.
My idea is for smaller chapters who want to get started easily, or
towns, regions to host their own branches of relevant data.
Given a world full of such servers, the sum would be great but the
individual branches needed at one time would be small.

mike

On Wed, Oct 21, 2009 at 9:49 PM, Bernie Innocenti <bernie@codewiz.org> wrote:
> [cc+=git@vger.kernel.org]
>
> El Wed, 21-10-2009 a las 08:43 -0400, Samuel Klein escribió:
>> That sounds like a great idea.  I know a few other people who have
>> worked on git-based wikis and toyed with making them compatible with
>> mediawiki (copying bernie innocenti, one of the most eloquent :).
>
> Then I'll do my best to sound as eloquent as expected :)
>
> While I think git's internal structure is wonderfully simple and
> elegant, I'm a little worried about its scalability in the wiki usecase.
>
> The scenario for which git's repository format was designed is "patch
> oriented" revision control of a filesystem tree. The central object of a
> git tree is the "commit", which represents a set of changes on multiple
> files. I'll disregard all the juicy details on how the changes are
> actually packed together to save disk space, making git's repository
> format amazingly compact.
>
> Commits are linked to each other in order to represent the history. Git
> can efficiently represent a highly non-linear history with thousands of
> branches, each containing hundreds of thousands revisions. Branching and
> merging huge trees is so fast that one is left wondering if anything has
> happened at all.
>
> So far, so good. This commit-oriented design is great if you want to
> track the history *the whole tree* at once, applying related changes to
> multiple files atomically. In Git, as well as most other version control
> systems, there's no such thing as a *file* revision! Git manages entire
> trees. Trees are assigned unique revision numbers (in fact, ugly sha-1
> hashes), and can optionally by tagged or branched at will.
>
> And here's the the catch: the history of individual files is not
> directly represented in a git repository. It is typically scattered
> across thousands of commit objects, with no direct links to help find
> them. If you want to retrieve the log of a file that was changed only 6
> times in the entire history of the Linux kernel, you'd have to dig
> through *all* of the 170K revisions in the "master" branch.
>
> And it takes some time even if git is blazingly fast:
>
>  bernie@giskard:~/src/kernel/linux-2.6$ time git log  --pretty=oneline REPORTING-BUGS  | wc -l
>  6
>
>  real   0m1.668s
>  user   0m1.416s
>  sys    0m0.210s
>
> (my laptop has a low-power CPU. A fast server would be 8-10x faster).
>
>
> Now, the English Wikipedia seems to have slightly more than 3M articles,
> with--how many? tenths of millions of revisions for sure. Going through
> them *every time* one needs to consult the history of a file would be
> 100x slower. Tens of seconds. Not acceptable, uh?
>
> It seems to me that the typical usage pattern of an encyclopedia is to
> change each article individually. Perhaps I'm underestimating the role
> of bots here. Anyway, there's no consistency *requirement* for mass
> changes to be applied atomically throughout all the encyclopedia, right?
>
> In conclusion, the "tree at a time" design is going to be a performance
> bottleneck for a large wiki, with no useful application. Unless of
> course the concept of changesets was exposed in the UI, which would be
> an interesting idea to explore.
>
> Mercurial (Hg) seems to have a better repository layout for the "one
> file at a time" access pattern... Unfortunately, it's also much slower
> than git for almost any other purpose, sometimes by an order of
> magnitude. I'm not even sure how well Hg would cope with a repository
> containing 3M files and some 30M revisions. The largest Hg tree I've
> dealt with is the "mozilla central" repo, which is already unbearably
> slow to work with.
>
> It would be interesting to compare notes with the other DSCM hackers,
> too.
>
> --
>   // Bernie Innocenti - http://codewiz.org/
>  \X/  Sugar Labs       - http://sugarlabs.org/
>
> --
> To unsubscribe from this list: send the line "unsubscribe git" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply

* [PATCH] Documentation/fetch-options.txt: order options alphabetically
From: Jari Aalto @ 2009-10-21 20:07 UTC (permalink / raw)
  To: git

Signed-off-by: Jari Aalto <jari.aalto@cante.net>
---
 Documentation/fetch-options.txt |   48 +++++++++++++++++++-------------------
 1 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 5eb2b0e..2886874 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -1,25 +1,13 @@
-ifndef::git-pull[]
--q::
---quiet::
-	Pass --quiet to git-fetch-pack and silence any other internally
-	used git commands.
-
--v::
---verbose::
-	Be verbose.
-endif::git-pull[]
-
 -a::
 --append::
 	Append ref names and object names of fetched refs to the
 	existing contents of `.git/FETCH_HEAD`.  Without this
 	option old data in `.git/FETCH_HEAD` will be overwritten.
 
---upload-pack <upload-pack>::
-	When given, and the repository to fetch from is handled
-	by 'git-fetch-pack', '--exec=<upload-pack>' is passed to
-	the command to specify non-default path for the command
-	run on the other end.
+--depth=<depth>::
+	Deepen the history of a 'shallow' repository created by
+	`git clone` with `--depth=<depth>` option (see linkgit:git-clone[1])
+	by the specified number of commits.
 
 -f::
 --force::
@@ -29,6 +17,10 @@ endif::git-pull[]
 	fetches is a descendant of `<lbranch>`.  This option
 	overrides that check.
 
+-k::
+--keep::
+	Keep downloaded pack.
+
 ifdef::git-pull[]
 --no-tags::
 endif::git-pull[]
@@ -49,10 +41,6 @@ endif::git-pull[]
 	flag lets all tags and their associated objects be
 	downloaded.
 
--k::
---keep::
-	Keep downloaded pack.
-
 -u::
 --update-head-ok::
 	By default 'git-fetch' refuses to update the head which
@@ -62,7 +50,19 @@ endif::git-pull[]
 	implementing your own Porcelain you are not supposed to
 	use it.
 
---depth=<depth>::
-	Deepen the history of a 'shallow' repository created by
-	`git clone` with `--depth=<depth>` option (see linkgit:git-clone[1])
-	by the specified number of commits.
+--upload-pack <upload-pack>::
+	When given, and the repository to fetch from is handled
+	by 'git-fetch-pack', '--exec=<upload-pack>' is passed to
+	the command to specify non-default path for the command
+	run on the other end.
+
+ifndef::git-pull[]
+-q::
+--quiet::
+	Pass --quiet to git-fetch-pack and silence any other internally
+	used git commands.
+
+-v::
+--verbose::
+	Be verbose.
+endif::git-pull[]
-- 
1.6.4.3

^ permalink raw reply related

* Re: keeping track of where a patch begins
From: Junio C Hamano @ 2009-10-21 20:03 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: E R, git, Jeff King
In-Reply-To: <alpine.LFD.2.00.0910211402490.21460@xanadu.home>

Nicolas Pitre <nico@fluxnic.net> writes:

> On Wed, 21 Oct 2009, E R wrote:
>
>> What solutions have you come up with to either to catch or prevent
>> this from happening? It is possible to determine what node a branch
>> started from?
>
> This can be determined by looking at the gitk output.
>
> Also 'git merge-base' can give you that node, given the main branch and 
> the topic branch.  See documentation about git-merge-base.
>
> Then if you need to move a branch to another starting node, then 'git 
> rebase' is what you need (again the git-rebase documentation is pretty 
> detailed).

That is a correct way to diagnose the mistake and recover from it, but
unfortunately it is a rather weak tool to identify the mistake in the
first place.

A branch in git, as Randal often used to say on #git, is an illusion---it
points only at the top and does not identify the bottom.

But it does _not_ have to stay that way at the Porcelain level.

Here is a rough sketch of one possible solution.  It is not fully thought
out; the basic idea is probably sound but I did not try to exhaustively
cover changes to various tools that are necessary to maintain the
invariants this scheme requires.

 (0) Define a way to identify the bottom of a branch.  One way to do this
     is by an extra ref (e.g. refs/branchpoints/frotz).  Then the commits
     between refs/branchpoints/frotz..refs/heads/frotz identifies the
     commits on the branch.  None of the additional restrictions below
     applies when the branch does not have such bottom defined (i.e.
     created by the current git without this extension).

 (1) At branch creation, the branchpoint is noted.  E.g.

     $ git branch frotz master~4

     would internally become

     $ git update-ref refs/heads/frotz master~4
     $ git update-ref refs/branchpoints/frotz master~4

     You would also need to cover "checkout -b".

 (2) You can grow the branch naturally with "commit", "am" and "merge".
     The bottom of the branch does not have to move with these operations.

 (3) Operations that alter histories, e.g. "commit --amend", "rebase",
     "reset", while on a branch that records its bottom need to be taught
     to pay attention to not break its bottom.  Paying attention needs to
     take different forms depending on the operation; some probably will
     forbid the operation while others would automatically adjust the
     bottom.

     Examples (not exhaustive):

 (3-a) "branch -f frotz $commit"

     This moves the tip of the branch.  Unless $commit is already some
     part of the existing frotz branch, we should probably forbid it for
     simplicity, when a bottom is defined for the branch.

     We could later loosen the rule so that $commit is only required to be
     a descendant of existing bottom of the branch to support a workflow
     like this:

     $ git checkout -b frotz master~4 ;# records branchpoint
     $ edit; git add; git commit; ... ;# builds history
     $ git checkout HEAD^             ;# go back somewhere on frotz
     $ edit; git add; git commit; ... ;# builds an alternate history
     $ git show-branch HEAD frotz     ;# check progress
     $ git diff frotz HEAD            ;# is this one better?
     $ git branch -f frotz            ;# I prefer this new one better

 (3-b) "reset $commit" (with or without --hard/--soft/--mixed)

     This is similar to (3-a) above; $commit has to be a descendant of
     existing bottom.

 (3-c) "commit --amend"

     $ git checkout -b frotz master~4 ;# records branchpoint
     $ git commit --amend             ;# rewrite the bottom???

     would probably be a mistake, as the end result would make the frotz
     branch forked from master~5 with the first commit on the branch a
     fix-up to what is already in the master branch.

     However, this is a valid way to work:

     $ git checkout -b frotz master~4 ;# records branchpoint
     $ edit; git add; git commit      ;# builds history
     $ git commit --amend             ;# fix the tip

     and it does not have to do anything to the bottom.

 (3-d) "rebase"

     $ git checkout -b frotz master~4 ;# records branchpoint
     $ edit; git add; git commit; ... ;# builds history
     $ git rebase --onto master       ;# transplants the branch

     would make the "onto" commit the new bottom.  Another interesting
     thing to note is that we do not have to compute which commits to
     transplant with merge-base with the "onto" commit, because we know
     the bottom commit of the branch.

 (4) Operations that browse histories, e.g. "log", "show-branch", while on
     a branch that records its bottom can be taught to pay attention to
     the bottom.  For example, it is conceivable that

     $ git log
     $ git log -- Documentation/

     without an explicit branch name that fell back to the default HEAD
     while on branch "frotz" might be better run with an implicit bottom
     ^refs/branchpoint/frotz.

We probably could kill the other bird in the nearby thread that wants to
add a description to a branch, if this scheme is fully implemented (no, I
am not going to start coding right away, as this message is just a sketch
of what we _could_ do), As we will fully know in what operations we need
to update the branchpoint ref, we could make the refs/branchpoints/frotz
an annotated tag, and store the description for the branch in that tag.
Whenever we need to adjust the branchpoint, we update it while carrying
the branch description message over to the new tag object.

^ permalink raw reply

* [PATCH] everyday: fsck and gc are not everyday operations
From: Anders Kaseorg @ 2009-10-21 20:02 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

Back in 2005 when this document was written, it may have made sense to 
introduce ‘git fsck’ (then ‘git fsck-objects’) as the very first example 
command for new users of Git 0.99.9.  Now that Git has been stable for 
years and does not actually tend to eat your data, it makes significantly 
less sense.  In fact, it sends an entirely wrong message.

‘git gc’ is also unnecessary for the purposes of this document, especially 
with gc.auto enabled by default.

The only other commands in the “Basic Repository” section were ‘git init’ 
and ‘git clone’.  ‘clone’ is already listed in the “Participant” section, 
so move ‘init’ to the “Standalone” section and get rid of “Basic 
Repository” entirely.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
---
 Documentation/everyday.txt |   51 +++----------------------------------------
 1 files changed, 4 insertions(+), 47 deletions(-)

diff --git a/Documentation/everyday.txt b/Documentation/everyday.txt
index 9310b65..e0ba8cc 100644
--- a/Documentation/everyday.txt
+++ b/Documentation/everyday.txt
@@ -1,13 +1,8 @@
 Everyday GIT With 20 Commands Or So
 ===================================
 
-<<Basic Repository>> commands are needed by people who have a
-repository --- that is everybody, because every working tree of
-git is a repository.
-
-In addition, <<Individual Developer (Standalone)>> commands are
-essential for anybody who makes a commit, even for somebody who
-works alone.
+<<Individual Developer (Standalone)>> commands are essential for
+anybody who makes a commit, even for somebody who works alone.
 
 If you work with other people, you will need commands listed in
 the <<Individual Developer (Participant)>> section as well.
@@ -20,46 +15,6 @@ administrators who are responsible for the care and feeding
 of git repositories.
 
 
-Basic Repository[[Basic Repository]]
-------------------------------------
-
-Everybody uses these commands to maintain git repositories.
-
-  * linkgit:git-init[1] or linkgit:git-clone[1] to create a
-    new repository.
-
-  * linkgit:git-fsck[1] to check the repository for errors.
-
-  * linkgit:git-gc[1] to do common housekeeping tasks such as
-    repack and prune.
-
-Examples
-~~~~~~~~
-
-Check health and remove cruft.::
-+
-------------
-$ git fsck <1>
-$ git count-objects <2>
-$ git gc <3>
-------------
-+
-<1> running without `\--full` is usually cheap and assures the
-repository health reasonably well.
-<2> check how many loose objects there are and how much
-disk space is wasted by not repacking.
-<3> repacks the local repository and performs other housekeeping tasks.
-
-Repack a small project into single pack.::
-+
-------------
-$ git gc <1>
-------------
-+
-<1> pack all the objects reachable from the refs into one pack,
-then remove the other packs.
-
-
 Individual Developer (Standalone)[[Individual Developer (Standalone)]]
 ----------------------------------------------------------------------
 
@@ -67,6 +22,8 @@ A standalone individual developer does not exchange patches with
 other people, and works alone in a single repository, using the
 following commands.
 
+  * linkgit:git-init[1] to create a new repository.
+
   * linkgit:git-show-branch[1] to see where you are.
 
   * linkgit:git-log[1] to see what happened.
-- 
1.6.5.1

^ permalink raw reply related

* Re: [Foundation-l] Wikipedia meets git
From: Bernie Innocenti @ 2009-10-21 19:49 UTC (permalink / raw)
  To: Samuel Klein; +Cc: Wikimedia Foundation Mailing List, git
In-Reply-To: <5396c0d10910210543i4c0a3350je5bee4c6389a2292@mail.gmail.com>

[cc+=git@vger.kernel.org]

El Wed, 21-10-2009 a las 08:43 -0400, Samuel Klein escribió:
> That sounds like a great idea.  I know a few other people who have
> worked on git-based wikis and toyed with making them compatible with
> mediawiki (copying bernie innocenti, one of the most eloquent :).

Then I'll do my best to sound as eloquent as expected :)

While I think git's internal structure is wonderfully simple and
elegant, I'm a little worried about its scalability in the wiki usecase.

The scenario for which git's repository format was designed is "patch
oriented" revision control of a filesystem tree. The central object of a
git tree is the "commit", which represents a set of changes on multiple
files. I'll disregard all the juicy details on how the changes are
actually packed together to save disk space, making git's repository
format amazingly compact.

Commits are linked to each other in order to represent the history. Git
can efficiently represent a highly non-linear history with thousands of
branches, each containing hundreds of thousands revisions. Branching and
merging huge trees is so fast that one is left wondering if anything has
happened at all.

So far, so good. This commit-oriented design is great if you want to
track the history *the whole tree* at once, applying related changes to
multiple files atomically. In Git, as well as most other version control
systems, there's no such thing as a *file* revision! Git manages entire
trees. Trees are assigned unique revision numbers (in fact, ugly sha-1
hashes), and can optionally by tagged or branched at will.

And here's the the catch: the history of individual files is not
directly represented in a git repository. It is typically scattered
across thousands of commit objects, with no direct links to help find
them. If you want to retrieve the log of a file that was changed only 6
times in the entire history of the Linux kernel, you'd have to dig
through *all* of the 170K revisions in the "master" branch.

And it takes some time even if git is blazingly fast:

 bernie@giskard:~/src/kernel/linux-2.6$ time git log  --pretty=oneline REPORTING-BUGS  | wc -l 
 6

 real	0m1.668s
 user	0m1.416s
 sys	0m0.210s

(my laptop has a low-power CPU. A fast server would be 8-10x faster).

Now, the English Wikipedia seems to have slightly more than 3M articles,
with--how many? tenths of millions of revisions for sure. Going through
them *every time* one needs to consult the history of a file would be
100x slower. Tens of seconds. Not acceptable, uh?

It seems to me that the typical usage pattern of an encyclopedia is to
change each article individually. Perhaps I'm underestimating the role
of bots here. Anyway, there's no consistency *requirement* for mass
changes to be applied atomically throughout all the encyclopedia, right?

In conclusion, the "tree at a time" design is going to be a performance
bottleneck for a large wiki, with no useful application. Unless of
course the concept of changesets was exposed in the UI, which would be
an interesting idea to explore.

Mercurial (Hg) seems to have a better repository layout for the "one
file at a time" access pattern... Unfortunately, it's also much slower
than git for almost any other purpose, sometimes by an order of
magnitude. I'm not even sure how well Hg would cope with a repository
containing 3M files and some 30M revisions. The largest Hg tree I've
dealt with is the "mozilla central" repo, which is already unbearably
slow to work with.

It would be interesting to compare notes with the other DSCM hackers,
too.

-- 
   // Bernie Innocenti - http://codewiz.org/
 \X/  Sugar Labs       - http://sugarlabs.org/

^ permalink raw reply

* Re: git submodules
From: Avery Pennarun @ 2009-10-21 19:38 UTC (permalink / raw)
  To: Steven Noonan; +Cc: Git Mailing List, crawl-ref-discuss
In-Reply-To: <f488382f0910171015j1a6d4d9fg690867154334c514@mail.gmail.com>

On Sat, Oct 17, 2009 at 1:15 PM, Steven Noonan <steven@uplinklabs.net> wrote:
> We're using git submodules for the contributing libraries. When I
> commit changes to those contribs, it correctly shows in the parent
> repository that those folders have different revisions than what's
> currently committed. However, if someone pulls those changes, it
> doesn't automatically update the contribs to match the committed
> version. But doing a pull or merge _should_ update the working tree to
> match the committed versions. It does with file data, so why not
> update the submodules? Especially if the submodule revision matched
> the committed version -before- the pull. Why are we forced into using
> 'git submodule update'?

<advertisement>
git-subtree (http://github.com/apenwarr/git-subtree) is an alternative
to submodules that doesn't have this problem.
</advertisement>

But it probably has other problems. :)  Works great for my purposes,
though, and quite a few people have contacted me to say they're using
it happily.

Have fun,

Avery

^ permalink raw reply

* [PATCH v2] Quote ' as \(aq in manpages
From: Thomas Rast @ 2009-10-21 18:57 UTC (permalink / raw)
  To: git; +Cc: Anders Kaseorg, Miklos Vajna, Junio C Hamano, bill lam
In-Reply-To: <alpine.DEB.2.00.0910211357160.5105@dr-wily.mit.edu>

The docbook/xmlto toolchain insists on quoting ' as \'.  This does
achieve the quoting goal, but modern 'man' implementations turn the
apostrophe into a unicode "proper" apostrophe (given the right
circumstances), breaking code examples in many of our manpages.

Quote them as \(aq instead, which is an "apostrophe quote" as per the
groff_char manpage.

Unfortunately, as Anders Kaseorg kindly pointed out, this is not
portable beyond groff, so we add an extra Makefile variable GNU_ROFF
which you need to enable to get the new quoting.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---

[Reinstated the Cc list, which I accidentally dropped when sending the
first patch...]

Anders Kaseorg wrote:
> \(aq is not portable to non-GNU roff.  See
>   http://bugs.debian.org/507673#65
>   http://sourceforge.net/tracker/index.php?func=detail&aid=2412738&group_id=21935&atid=373747
> for a proposed portable solution.

Thanks for pointing that out.  Makes things a lot easier though.  I'm
really beginning to enjoy the whole doc toolchain.

I could not find a way to insert the proposed definitions into the
header by tweaking the xsls, so unless someone comes up with a way of
doing that, this is the best I can do.

To save you the effort of clicking the links, the header definitions
would be

.ie \n(.g .ds Aq \(aq
.el .ds Aq '

and you then have to change the template to quote to \(Aq instead.


 Documentation/Makefile               |    3 +++
 Documentation/manpage-quote-apos.xsl |   16 ++++++++++++++++
 2 files changed, 19 insertions(+), 0 deletions(-)
 create mode 100644 Documentation/manpage-quote-apos.xsl

diff --git a/Documentation/Makefile b/Documentation/Makefile
index 06b0c57..68876d0 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -102,6 +102,9 @@ endif
 ifdef DOCBOOK_SUPPRESS_SP
 XMLTO_EXTRA += -m manpage-suppress-sp.xsl
 endif
+ifdef GNU_ROFF
+XMLTO_EXTRA += -m manpage-quote-apos.xsl
+endif
 
 SHELL_PATH ?= $(SHELL)
 # Shell quote;
diff --git a/Documentation/manpage-quote-apos.xsl b/Documentation/manpage-quote-apos.xsl
new file mode 100644
index 0000000..aeb8839
--- /dev/null
+++ b/Documentation/manpage-quote-apos.xsl
@@ -0,0 +1,16 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+		version="1.0">
+
+<!-- work around newer groff/man setups using a prettier apostrophe
+     that unfortunately does not quote anything when cut&pasting
+     examples to the shell -->
+<xsl:template name="escape.apostrophe">
+  <xsl:param name="content"/>
+  <xsl:call-template name="string.subst">
+    <xsl:with-param name="string" select="$content"/>
+    <xsl:with-param name="target">'</xsl:with-param>
+    <xsl:with-param name="replacement">\(aq</xsl:with-param>
+  </xsl:call-template>
+</xsl:template>
+
+</xsl:stylesheet>
-- 
1.6.5.1.144.g316236

^ permalink raw reply related

* [PATCH v2 2/2] filter-branch: nearest-ancestor rewriting outside subdir filter
From: Thomas Rast @ 2009-10-21 18:28 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <95535b01e2181d321190c6d93b2834188612a389.1256149428.git.trast@student.ethz.ch>

Since a0e4639 (filter-branch: fix ref rewriting with
--subdirectory-filter, 2008-08-12) git-filter-branch has done
nearest-ancestor rewriting when using a --subdirectory-filter.

However, that rewriting strategy is also a useful building block in
other tasks.  For example, if you want to split out a subset of files
from your history, you would typically call

  git filter-branch -- <refs> -- <files>

But this fails for all refs that do not point directly to a commit
that affects <files>, because their referenced commit will not be
rewritten and the ref remains untouched.

The code was already there for the --subdirectory-filter case, so just
introduce an option that enables it independently.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---

Evidently I shouldn't send any patches after dinner, or before, for
that matter (but for lack of wifi in the restaurant, *during* dinner
is not an option either).  Or at least I shouldn't re-read them after
sending :-(

v1 had a completely misplaced option parsing for the new option.
Very embarrassing.  Really.

I also sneak fixed the commit message above; you only need two -- if
you want rev-list options, e.g.,

  git filter-branch -- --all -- README

in which case the first one of course needs to appear before the first
<refs> argument.


 Documentation/git-filter-branch.txt |   13 ++++++++++++-
 git-filter-branch.sh                |    9 ++++++++-
 t/t7003-filter-branch.sh            |   18 ++++++++++++++++++
 3 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-filter-branch.txt b/Documentation/git-filter-branch.txt
index 2b40bab..394a77a 100644
--- a/Documentation/git-filter-branch.txt
+++ b/Documentation/git-filter-branch.txt
@@ -159,7 +159,18 @@ to other tags will be rewritten to point to the underlying commit.
 --subdirectory-filter <directory>::
 	Only look at the history which touches the given subdirectory.
 	The result will contain that directory (and only that) as its
-	project root.
+	project root.  Implies --remap-to-ancestor.
+
+--remap-to-ancestor::
+	Rewrite refs to the nearest rewritten ancestor instead of
+	ignoring them.
++
+Normally, positive refs on the command line are only changed if the
+commit they point to was rewritten.  However, you can limit the extent
+of this rewriting by using linkgit:rev-list[1] arguments, e.g., path
+limiters.  Refs pointing to such excluded commits would then normally
+be ignored.  With this option, they are instead rewritten to point at
+the nearest ancestor that was not excluded.
 
 --prune-empty::
 	Some kind of filters will generate empty commits, that left the tree
diff --git a/git-filter-branch.sh b/git-filter-branch.sh
index 3890c22..be36db4 100755
--- a/git-filter-branch.sh
+++ b/git-filter-branch.sh
@@ -125,6 +125,7 @@ filter_subdir=
 orig_namespace=refs/original/
 force=
 prune_empty=
+remap_to_ancestor=
 while :
 do
 	case "$1" in
@@ -137,6 +138,11 @@ do
 		force=t
 		continue
 		;;
+	--remap-to-ancestor)
+		shift
+		remap_to_ancestor=t
+		continue
+		;;
 	--prune-empty)
 		shift
 		prune_empty=t
@@ -182,6 +188,7 @@ do
 		;;
 	--subdirectory-filter)
 		filter_subdir="$OPTARG"
+		remap_to_ancestor=t
 		;;
 	--original)
 		orig_namespace=$(expr "$OPTARG/" : '\(.*[^/]\)/*$')/
@@ -364,7 +371,7 @@ done <../revs
 # revision walker.  Fix it by mapping these heads to the unique nearest
 # ancestor that survived the pruning.
 
-if test "$filter_subdir"
+if test "$remap_to_ancestor" = t
 then
 	while read ref
 	do
diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh
index 329c851..9503875 100755
--- a/t/t7003-filter-branch.sh
+++ b/t/t7003-filter-branch.sh
@@ -288,4 +288,22 @@ test_expect_success 'Prune empty commits' '
 	test_cmp expect actual
 '
 
+test_expect_success '--remap-to-ancestor with filename filters' '
+	git checkout master &&
+	git reset --hard A &&
+	test_commit add-foo foo 1 &&
+	git branch moved-foo &&
+	test_commit add-bar bar a &&
+	git branch invariant &&
+	orig_invariant=$(git rev-parse invariant) &&
+	git branch moved-bar &&
+	test_commit change-foo foo 2 &&
+	git filter-branch -f --remap-to-ancestor \
+		moved-foo moved-bar A..master \
+		-- -- foo &&
+	test $(git rev-parse moved-foo) = $(git rev-parse moved-bar) &&
+	test $(git rev-parse moved-foo) = $(git rev-parse master^) &&
+	test $orig_invariant = $(git rev-parse invariant)
+'
+
 test_done
-- 
1.6.5.1.142.g4bac9

^ permalink raw reply related

* [PATCH v2 1/2] filter-branch: stop special-casing $filter_subdir argument
From: Thomas Rast @ 2009-10-21 18:28 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <95535b01e2181d321190c6d93b2834188612a389.1256148512.git.trast@student.ethz.ch>

Handling $filter_subdir in the usual way requires a separate case at
every use, because the variable is empty when unused.  Furthermore,
the case for --subdirectory-filter supplies its own --, so the user
cannot provide one himself (though there is also very little point in
doing so).

Instead, tack the $filter_subdir onto $@ in the right place
automatically, and only use a -- if it was not already provided by the
user.

We set non_ref_args again after changing "$@"; the next patch wants to
use it again afterwards, so we better not leave a stale value in
there.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---

[Same as v1.]

This is preparatory for the next patch; introducing another 'case'
along the lines of the existing one annoyed me, so I went for this
instead.  I would greatly appreciate extra eyes on my use of 'eval'.
I originally expected this to work without eval, but apparently this
is how one does it.  Quoting rules in the shell are annoying.

Incidentally, the last hunk sneak fixes a previously unquoted use of
$ref that is my fault from back in a0e4639 (filter-branch: fix ref
rewriting with --subdirectory-filter, 2008-08-12).

 git-filter-branch.sh |   28 +++++++++++++++++++++-------
 1 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/git-filter-branch.sh b/git-filter-branch.sh
index a480d6f..3890c22 100755
--- a/git-filter-branch.sh
+++ b/git-filter-branch.sh
@@ -257,15 +257,29 @@ git read-tree || die "Could not seed the index"
 # map old->new commit ids for rewriting parents
 mkdir ../map || die "Could not create map/ directory"

+non_ref_args=$(git rev-parse --no-revs --sq "$@")
+dashdash=--
+for arg in "$non_ref_args"; do
+	if test arg = --; then
+		dashdash=
+		break
+	fi
+done
+
 case "$filter_subdir" in
 "")
-	git rev-list --reverse --topo-order --default HEAD \
-		--parents --simplify-merges "$@"
+	filter_subdir_sq=
 	;;
 *)
-	git rev-list --reverse --topo-order --default HEAD \
-		--parents --simplify-merges "$@" -- "$filter_subdir"
-esac > ../revs || die "Could not get the commits"
+	filter_subdir_sq=$(git rev-parse --sq-quote "$filter_subdir")
+esac
+
+eval "set -- \"\$@\" $dashdash $filter_subdir_sq"
+non_ref_args=$(git rev-parse --no-revs --sq "$@")
+
+git rev-list --reverse --topo-order --default HEAD \
+	--parents --simplify-merges "$@" \
+	> ../revs || die "Could not get the commits"
 commits=$(wc -l <../revs | tr -d " ")

 test $commits -eq 0 && die "Found nothing to rewrite"
@@ -356,8 +370,8 @@ then
 	do
 		sha1=$(git rev-parse "$ref"^0)
 		test -f "$workdir"/../map/$sha1 && continue
-		ancestor=$(git rev-list --simplify-merges -1 \
-				$ref -- "$filter_subdir")
+		ancestor=$(eval "git rev-list --simplify-merges " \
+				"-1 \"$ref\" $non_ref_args")
 		test "$ancestor" && echo $(map $ancestor) >> "$workdir"/../map/$sha1
 	done < "$tempdir"/heads
 fi
-- 
1.6.5.1.142.g4bac9

^ permalink raw reply related

* [PATCH 2/2] filter-branch: nearest-ancestor rewriting outside subdir filter
From: Thomas Rast @ 2009-10-21 18:16 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <95535b01e2181d321190c6d93b2834188612a389.1256148512.git.trast@student.ethz.ch>

Since a0e4639 (filter-branch: fix ref rewriting with
--subdirectory-filter, 2008-08-12) git-filter-branch has done
nearest-ancestor rewriting when using a --subdirectory-filter.

However, that rewriting strategy is also a useful building block in
other tasks.  For example, if you want to split out a subset of files
from your history, you would typically call

  git filter-branch <refs> -- -- <files>

But this fails for all refs that do not point directly to a commit
that affects <files>, because their referenced commit will not be
rewritten and the ref remains untouched.

The code was already there for the --subdirectory-filter case, so just
introduce an option that enables it independently.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---

This came up on IRC the other day (see a pattern?), when someone
wanted to split out the history for a single file, and first had to
point all relevant refs at the corresponding nearest relevant
ancestor.

I think we could even make this option the default if it wasn't for
backwards compatibility; after all, what use is rewriting the commits
if you do not get a ref pointing to them?


 Documentation/git-filter-branch.txt |   13 ++++++++++++-
 git-filter-branch.sh                |    7 ++++++-
 t/t7003-filter-branch.sh            |   18 ++++++++++++++++++
 3 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-filter-branch.txt b/Documentation/git-filter-branch.txt
index 2b40bab..394a77a 100644
--- a/Documentation/git-filter-branch.txt
+++ b/Documentation/git-filter-branch.txt
@@ -159,7 +159,18 @@ to other tags will be rewritten to point to the underlying commit.
 --subdirectory-filter <directory>::
 	Only look at the history which touches the given subdirectory.
 	The result will contain that directory (and only that) as its
-	project root.
+	project root.  Implies --remap-to-ancestor.
+
+--remap-to-ancestor::
+	Rewrite refs to the nearest rewritten ancestor instead of
+	ignoring them.
++
+Normally, positive refs on the command line are only changed if the
+commit they point to was rewritten.  However, you can limit the extent
+of this rewriting by using linkgit:rev-list[1] arguments, e.g., path
+limiters.  Refs pointing to such excluded commits would then normally
+be ignored.  With this option, they are instead rewritten to point at
+the nearest ancestor that was not excluded.
 
 --prune-empty::
 	Some kind of filters will generate empty commits, that left the tree
diff --git a/git-filter-branch.sh b/git-filter-branch.sh
index 3890c22..d18f82d 100755
--- a/git-filter-branch.sh
+++ b/git-filter-branch.sh
@@ -125,6 +125,7 @@ filter_subdir=
 orig_namespace=refs/original/
 force=
 prune_empty=
+remap_to_ancestor=
 while :
 do
 	case "$1" in
@@ -182,10 +183,14 @@ do
 		;;
 	--subdirectory-filter)
 		filter_subdir="$OPTARG"
+		remap_to_ancestor=t
 		;;
 	--original)
 		orig_namespace=$(expr "$OPTARG/" : '\(.*[^/]\)/*$')/
 		;;
+	--remap-to-ancestor)
+		remap_to_ancestor=t
+		;;
 	*)
 		usage
 		;;
@@ -364,7 +369,7 @@ done <../revs
 # revision walker.  Fix it by mapping these heads to the unique nearest
 # ancestor that survived the pruning.
 
-if test "$filter_subdir"
+if test "$remap_to_ancestor" = t
 then
 	while read ref
 	do
diff --git a/t/t7003-filter-branch.sh b/t/t7003-filter-branch.sh
index 329c851..9503875 100755
--- a/t/t7003-filter-branch.sh
+++ b/t/t7003-filter-branch.sh
@@ -288,4 +288,22 @@ test_expect_success 'Prune empty commits' '
 	test_cmp expect actual
 '
 
+test_expect_success '--remap-to-ancestor with filename filters' '
+	git checkout master &&
+	git reset --hard A &&
+	test_commit add-foo foo 1 &&
+	git branch moved-foo &&
+	test_commit add-bar bar a &&
+	git branch invariant &&
+	orig_invariant=$(git rev-parse invariant) &&
+	git branch moved-bar &&
+	test_commit change-foo foo 2 &&
+	git filter-branch -f --remap-to-ancestor \
+		moved-foo moved-bar A..master \
+		-- -- foo &&
+	test $(git rev-parse moved-foo) = $(git rev-parse moved-bar) &&
+	test $(git rev-parse moved-foo) = $(git rev-parse master^) &&
+	test $orig_invariant = $(git rev-parse invariant)
+'
+
 test_done
-- 
1.6.5.1.139.g12527

^ permalink raw reply related

* [PATCH 1/2] filter-branch: stop special-casing $filter_subdir argument
From: Thomas Rast @ 2009-10-21 18:16 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Handling $filter_subdir in the usual way requires a separate case at
every use, because the variable is empty when unused.  Furthermore,
the case for --subdirectory-filter supplies its own --, so the user
cannot provide one himself (though there is also very little point in
doing so).

Instead, tack the $filter_subdir onto $@ in the right place
automatically, and only use a -- if it was not already provided by the
user.

We set non_ref_args again after changing "$@"; the next patch wants to
use it again afterwards, so we better not leave a stale value in
there.

Signed-off-by: Thomas Rast <trast@student.ethz.ch>
---

This is preparatory for the next patch; introducing another 'case'
along the lines of the existing one annoyed me, so I went for this
instead.  I would greatly appreciate extra eyes on my use of 'eval'.
I originally expected this to work without eval, but apparently this
is how one does it.  Quoting rules in the shell are annoying.

Incidentally, the last hunk sneak fixes a previously unquoted use of
$ref that is my fault from back in a0e4639 (filter-branch: fix ref
rewriting with --subdirectory-filter, 2008-08-12).

 git-filter-branch.sh |   28 +++++++++++++++++++++-------
 1 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/git-filter-branch.sh b/git-filter-branch.sh
index a480d6f..3890c22 100755
--- a/git-filter-branch.sh
+++ b/git-filter-branch.sh
@@ -257,15 +257,29 @@ git read-tree || die "Could not seed the index"
 # map old->new commit ids for rewriting parents
 mkdir ../map || die "Could not create map/ directory"

+non_ref_args=$(git rev-parse --no-revs --sq "$@")
+dashdash=--
+for arg in "$non_ref_args"; do
+	if test arg = --; then
+		dashdash=
+		break
+	fi
+done
+
 case "$filter_subdir" in
 "")
-	git rev-list --reverse --topo-order --default HEAD \
-		--parents --simplify-merges "$@"
+	filter_subdir_sq=
 	;;
 *)
-	git rev-list --reverse --topo-order --default HEAD \
-		--parents --simplify-merges "$@" -- "$filter_subdir"
-esac > ../revs || die "Could not get the commits"
+	filter_subdir_sq=$(git rev-parse --sq-quote "$filter_subdir")
+esac
+
+eval "set -- \"\$@\" $dashdash $filter_subdir_sq"
+non_ref_args=$(git rev-parse --no-revs --sq "$@")
+
+git rev-list --reverse --topo-order --default HEAD \
+	--parents --simplify-merges "$@" \
+	> ../revs || die "Could not get the commits"
 commits=$(wc -l <../revs | tr -d " ")

 test $commits -eq 0 && die "Found nothing to rewrite"
@@ -356,8 +370,8 @@ then
 	do
 		sha1=$(git rev-parse "$ref"^0)
 		test -f "$workdir"/../map/$sha1 && continue
-		ancestor=$(git rev-list --simplify-merges -1 \
-				$ref -- "$filter_subdir")
+		ancestor=$(eval "git rev-list --simplify-merges " \
+				"-1 \"$ref\" $non_ref_args")
 		test "$ancestor" && echo $(map $ancestor) >> "$workdir"/../map/$sha1
 	done < "$tempdir"/heads
 fi
-- 
1.6.5.1.139.g12527

^ permalink raw reply related

* Re: keeping track of where a patch begins
From: Nicolas Pitre @ 2009-10-21 18:14 UTC (permalink / raw)
  To: E R; +Cc: git
In-Reply-To: <3a69fa7c0910210745r311cf18xf966f5c63650cde6@mail.gmail.com>

On Wed, 21 Oct 2009, E R wrote:

> What solutions have you come up with to either to catch or prevent
> this from happening? It is possible to determine what node a branch
> started from?

This can be determined by looking at the gitk output.

Also 'git merge-base' can give you that node, given the main branch and 
the topic branch.  See documentation about git-merge-base.

Then if you need to move a branch to another starting node, then 'git 
rebase' is what you need (again the git-rebase documentation is pretty 
detailed).

Nicolas

^ permalink raw reply

* Re: [PATCH] Quote ' as \(aq in manpages
From: Anders Kaseorg @ 2009-10-21 18:01 UTC (permalink / raw)
  To: Thomas Rast; +Cc: git, Junio C Hamano
In-Reply-To: <ab31eb03b25272341b91e1f1132dab9d8a49e5b6.1256113282.git.trast@student.ethz.ch>

On Wed, 21 Oct 2009, Thomas Rast wrote:
> The docbook/xmlto toolchain insists on quoting ' as \'.  This does
> achieve the quoting goal, but modern 'man' implementations turn the
> apostrophe into a unicode "proper" apostrophe (given the right
> circumstances), breaking code examples in many of our manpages.
> 
> Quote them as \(aq instead, which is an "apostrophe quote" as per the
> groff_char manpage.

\(aq is not portable to non-GNU roff.  See
  http://bugs.debian.org/507673#65
  http://sourceforge.net/tracker/index.php?func=detail&aid=2412738&group_id=21935&atid=373747
for a proposed portable solution.

Anders

^ permalink raw reply

* Re: confusion with git diff-tree output
From: Jan Krüger @ 2009-10-21 17:51 UTC (permalink / raw)
  To: David Roundy; +Cc: git
In-Reply-To: <117f2cc80910211043q3a92a7b6o15464cc049ee33dc@mail.gmail.com>

> David Roundy <roundyd@physics.oregonstate.edu> wrote:

> I've been struggling with trying to figure out how to make diff-tree
> output the actual files changed.  Below is the output when I run
> diff-tree on a given commit.  It reports that the directory is
> modified, rather than that a single file within that directory is
> modified.

Tree objects are recursively nested, i.e.

> 66b67ea1763799c0b2ac01f6803177ca870f6544 M	Iolaus

is a reference to another tree object... and since a file in that
subtree changed, a new tree object that contains a different file
record is now referenced as "Iolaus".

By default git diff-tree doesn't recurse, but you can use -r for that.
Which is documented, I might add. ;)

Jan

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox