Git development
 help / color / mirror / Atom feed
* Re: ALSA official git repository
From: Linus Torvalds @ 2005-05-27 16:13 UTC (permalink / raw)
  To: Jaroslav Kysela; +Cc: LKML, Andrew Morton, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505271741490.1757@pnote.perex-int.cz>



On Fri, 27 May 2005, Jaroslav Kysela wrote:
> 
> 	I created new git tree for the ALSA project at:
> 
> rsync://rsync.kernel.org/pub/scm/linux/kernel/git/perex/alsa.git

Your scripts(?) to generate these things are a bit strange, since they
leave an extra empty line in the commit message, which confuses at least
gitweb (ie just look at

   http://www.kernel.org/git/?p=linux/kernel/git/perex/alsa.git;a=summary

and note how the summary thing looks empty).

Now, arguably gitweb should ignore whitespace at the beginning, but 
equally arguably your commits shouldn't have them either...

		Linus


^ permalink raw reply

* GIT and COGITO individual scripts hidding from PATH
From: Pavel Pisa @ 2005-05-27 16:24 UTC (permalink / raw)
  To: Petr Baudis, git
In-Reply-To: <20050524111333.GA11223@pasky.ji.cz>

Hello All,

I prefer to have as small number of executables in /usr/bin
and /user/local/bin as possible. I have tried to push original
Peter's git script to this direction for my strange personal taste.
COGITO has taken different direction than. I have hidden cogito and git
under wrapper to make me happy again. I thing that it is against good
opensource habits to keep changes secret even if I do not expect
interrest in this version. But may it be, that it can make
happy somebody with same taste as I have.

The "cgt" is wrapper for all cogito programs. If cogito is
build and installed to any prefix directory (/opt/git-cogito in my case)
and script is copied to bin subdirectory (/opt/git-cogito/bin),
then only single symbolic link from some searchable directory on PATH
is enough to call all cogito commands
  cgt pull
  cgt update
so usage is more convenient for my fingers learned to type cvs up, etc.
I think, that addition of intelligent bash completion could be done easier
if number of the first level commands to cover is smaller.

Best wishes

        Pavel Pisa

"cgt" script
--------------------------------------------------------------------------------
#!/usr/bin/env bash
#
# The GIT scripted toolkit.
# Copyright (c) Petr Baudis, 2005
#
# Script to automatically wrap individual GIT commands under one script
# for those, who prefer only one or two links in /usr/local/bin
# This script added by Pavel Pisa <pisa@cmp.felk.cvut.cz>
#
# This command mostly only multiplexes to the individual scripts based
# on the first argument.

error () {
	echo cgt: $@ >&2
}
die () {
	error $@
	exit 1
}

if [ -z "$GIT_TOOLS_DIR" ] ; then
	GIT_MUXBINARY_DIR="$(dirname "$0")"
	if [ -h "$0" ]; then
		#GIT_TOOLS_DIR="$(ls -l "$0" | sed 's/^.*-> *\(.*\) *$/\1/')"
		GIT_TOOLS_DIR="$(readlink -f -n $0)"
	        GIT_TOOLS_DIR="$(dirname "$GIT_TOOLS_DIR")"
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; cd "$GIT_TOOLS_DIR" ; pwd )"
	else
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; pwd )"
	fi

	if [ -z "$(echo :$PATH: | sed -n -e 's#:'"$GIT_TOOLS_DIR"':#yes#p' )" ] ; 
then
		export PATH="$GIT_TOOLS_DIR:$PATH"
	fi

	export GIT_TOOLS_DIR
fi

#echo =$GIT_TOOLS_DIR
#echo PATH=$PATH
#echo CWD=$(pwd)
#exit 

help () {
	echo "cgt recognizes next commands:" >&2 
	(cd $GIT_TOOLS_DIR ; ls cg-* ) | sed -n -e 's/\<cg-\([^ ]*\)\>/\1/pg' >&2 
}


cmd=$1; shift

if [ ! "$cmd" ]; then
	error "missing command"
	help
	exit 1
fi

cgt_cmd_exe="${GIT_TOOLS_DIR}/cg-$cmd"

if [ ! -e "$cgt_cmd_exe" ]; then
	error "unrecognized command ${cmd}"
	help
	exit 1
fi

"$cgt_cmd_exe" "$@"
--------------------------------------------------------------------------------

"git" script
--------------------------------------------------------------------------------
#!/usr/bin/env bash
#
# The GIT scripted toolkit.
# Copyright (c) Petr Baudis, 2005
#
# Script to automatically wrap individual GIT commands under one script
# for those, who prefer only one or two links in /usr/local/bin
# This script added by Pavel Pisa <pisa@cmp.felk.cvut.cz>
#
# This command mostly only multiplexes to the individual scripts based
# on the first argument.

error () {
	echo git: $@ >&2
}
die () {
	error $@
	exit 1
}

if [ -z "$GIT_TOOLS_DIR" ] ; then
	GIT_MUXBINARY_DIR="$(dirname "$0")"
	if [ -h "$0" ]; then
		#GIT_TOOLS_DIR="$(ls -l "$0" | sed 's/^.*-> *\(.*\) *$/\1/')"
		GIT_TOOLS_DIR="$(readlink -f -n $0)"
	        GIT_TOOLS_DIR="$(dirname "$GIT_TOOLS_DIR")"
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; cd "$GIT_TOOLS_DIR" ; pwd )"
	else
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; pwd )"
	fi

	if [ -z "$(echo :$PATH: | sed -n -e 's#:'"$GIT_TOOLS_DIR"':#yes#p' )" ] ; 
then
		export PATH="$GIT_TOOLS_DIR:$PATH"
	fi

	export GIT_TOOLS_DIR
fi

#echo =$GIT_TOOLS_DIR
#echo PATH=$PATH
#echo CWD=$(pwd)
#exit 

help () {
	echo "git recognizes next commands:" >&2 
	(cd $GIT_TOOLS_DIR ; ls git-* ) | sed -n -e 's/\<git-\([^ ]*\)\>/\1/pg' >&2 
}


cmd=$1; shift

if [ ! "$cmd" ]; then
	error "missing command"
	help
	exit 1
fi

git_cmd_exe="${GIT_TOOLS_DIR}/git-$cmd"

if [ ! -e "$git_cmd_exe" ]; then
	error "unrecognized command ${cmd}"
	help
	exit 1
fi

"$git_cmd_exe" "$@"
--------------------------------------------------------------------------------


^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: H. Peter Anvin @ 2005-05-27 16:25 UTC (permalink / raw)
  To: Markus F.X.J. Oberhumer; +Cc: git
In-Reply-To: <42971EB4.2050403@oberhumer.com>

Markus F.X.J. Oberhumer wrote:
> This trivial patch fixes an obvious ptrdiff_t vs. int mismatch. Which
> makes we wonder why Linus isn't hitting this on his ppc64 - maybe it's
> time to start using -Werror...

This doesn't affect CPUs with a register-based calling convention.

	-hpa

^ permalink raw reply

* Re: GIT and COGITO individual scripts hidding from PATH
From: Pavel Pisa @ 2005-05-27 16:29 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git
In-Reply-To: <200505271824.05105.pisa@cmp.felk.cvut.cz>

Excuse me for resending of e-mail again, but I have forget
to disable wrapping of lines. Sending as attachmet would be
more secure, but I do not know, if it is convenient
on the git list.

The "cgt" is wrapper for all cogito programs. If cogito is
build and installed to any prefix directory (/opt/git-cogito in my case)
and script is copied to bin subdirectory (/opt/git-cogito/bin),
then only single symbolic link from some searchable directory on PATH
is enough to call all cogito commands
  cgt pull
  cgt update
so usage is more convenient for my fingers learned to type cvs up, etc.
I think, that addition of intelligent bash completion could be done easier
if number of the first level commands to cover is smaller.

"cgt" script
---------------------------------------------------------------------------
#!/usr/bin/env bash
#
# The GIT scripted toolkit.
# Copyright (c) Petr Baudis, 2005
#
# Script to automatically wrap individual GIT commands under one script
# for those, who prefer only one or two links in /usr/local/bin
# This script added by Pavel Pisa <pisa@cmp.felk.cvut.cz>
#
# This command mostly only multiplexes to the individual scripts based
# on the first argument.

error () {
	echo cgt: $@ >&2
}
die () {
	error $@
	exit 1
}

if [ -z "$GIT_TOOLS_DIR" ] ; then
	GIT_MUXBINARY_DIR="$(dirname "$0")"
	if [ -h "$0" ]; then
		#GIT_TOOLS_DIR="$(ls -l "$0" | sed 's/^.*-> *\(.*\) *$/\1/')"
		GIT_TOOLS_DIR="$(readlink -f -n $0)"
	        GIT_TOOLS_DIR="$(dirname "$GIT_TOOLS_DIR")"
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; cd "$GIT_TOOLS_DIR" ; pwd )"
	else
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; pwd )"
	fi

	if [ -z "$(echo :$PATH: | sed -n -e 's#:'"$GIT_TOOLS_DIR"':#yes#p' )" ] ; then
		export PATH="$GIT_TOOLS_DIR:$PATH"
	fi

	export GIT_TOOLS_DIR
fi

#echo =$GIT_TOOLS_DIR
#echo PATH=$PATH
#echo CWD=$(pwd)
#exit 

help () {
	echo "cgt recognizes next commands:" >&2 
	(cd $GIT_TOOLS_DIR ; ls cg-* ) | sed -n -e 's/\<cg-\([^ ]*\)\>/\1/pg' >&2 
}


cmd=$1; shift

if [ ! "$cmd" ]; then
	error "missing command"
	help
	exit 1
fi

cgt_cmd_exe="${GIT_TOOLS_DIR}/cg-$cmd"

if [ ! -e "$cgt_cmd_exe" ]; then
	error "unrecognized command ${cmd}"
	help
	exit 1
fi

"$cgt_cmd_exe" "$@"
---------------------------------------------------------------------------

"git" script
---------------------------------------------------------------------------
#!/usr/bin/env bash
#
# The GIT scripted toolkit.
# Copyright (c) Petr Baudis, 2005
#
# Script to automatically wrap individual GIT commands under one script
# for those, who prefer only one or two links in /usr/local/bin
# This script added by Pavel Pisa <pisa@cmp.felk.cvut.cz>
#
# This command mostly only multiplexes to the individual scripts based
# on the first argument.

error () {
	echo git: $@ >&2
}
die () {
	error $@
	exit 1
}

if [ -z "$GIT_TOOLS_DIR" ] ; then
	GIT_MUXBINARY_DIR="$(dirname "$0")"
	if [ -h "$0" ]; then
		#GIT_TOOLS_DIR="$(ls -l "$0" | sed 's/^.*-> *\(.*\) *$/\1/')"
		GIT_TOOLS_DIR="$(readlink -f -n $0)"
	        GIT_TOOLS_DIR="$(dirname "$GIT_TOOLS_DIR")"
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; cd "$GIT_TOOLS_DIR" ; pwd )"
	else
		GIT_TOOLS_DIR="$(cd "$GIT_MUXBINARY_DIR" ; pwd )"
	fi

	if [ -z "$(echo :$PATH: | sed -n -e 's#:'"$GIT_TOOLS_DIR"':#yes#p' )" ] ; then
		export PATH="$GIT_TOOLS_DIR:$PATH"
	fi

	export GIT_TOOLS_DIR
fi

#echo =$GIT_TOOLS_DIR
#echo PATH=$PATH
#echo CWD=$(pwd)
#exit 

help () {
	echo "git recognizes next commands:" >&2 
	(cd $GIT_TOOLS_DIR ; ls git-* ) | sed -n -e 's/\<git-\([^ ]*\)\>/\1/pg' >&2 
}


cmd=$1; shift

if [ ! "$cmd" ]; then
	error "missing command"
	help
	exit 1
fi

git_cmd_exe="${GIT_TOOLS_DIR}/git-$cmd"

if [ ! -e "$git_cmd_exe" ]; then
	error "unrecognized command ${cmd}"
	help
	exit 1
fi

"$git_cmd_exe" "$@"
---------------------------------------------------------------------------

^ permalink raw reply

* Re: ALSA official git repository
From: Sean @ 2005-05-27 17:00 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Jaroslav Kysela, LKML, Andrew Morton, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505270903230.17402@ppc970.osdl.org>

[-- Attachment #1: Type: text/plain, Size: 822 bytes --]

On Fri, May 27, 2005 12:13 pm, Linus Torvalds said:
> On Fri, 27 May 2005, Jaroslav Kysela wrote:
>>
>> 	I created new git tree for the ALSA project at:
>>
>> rsync://rsync.kernel.org/pub/scm/linux/kernel/git/perex/alsa.git
>
> Your scripts(?) to generate these things are a bit strange, since they
> leave an extra empty line in the commit message, which confuses at least
> gitweb (ie just look at
>
>    http://www.kernel.org/git/?p=linux/kernel/git/perex/alsa.git;a=summary
>
> and note how the summary thing looks empty).
>
> Now, arguably gitweb should ignore whitespace at the beginning, but
> equally arguably your commits shouldn't have them either...
>

Perhaps git should enforce this?  Patch attached.


Remove leading empty lines from commit messages.

Signed-off-by: Sean Estabrooks <seanlkml@sympatico.ca>


[-- Attachment #2: trim_leading_commit_ws.patch --]
[-- Type: text/plain, Size: 1133 bytes --]

--- raw/commit-tree.c	2005-05-26 23:38:30.000000000 -0400
+++ argp2/commit-tree.c	2005-05-27 12:46:54.000000000 -0400
@@ -90,6 +90,18 @@
 	free(buf);
 }
 
+static int whitespace(const char *msg) 
+{
+	while (*msg) 
+		switch (*msg) {
+		case ' ': case '\t': case '\n': case '\r':
+			msg++; break;
+		default:
+			return 0;
+		}
+	return 1;
+}
+
 /*
  * Having more than two parents is not strange at all, and this is
  * how multi-way merges are represented.
@@ -112,7 +124,7 @@
 	char comment[1000];
 	struct passwd *pw;
 	char *buffer;
-	unsigned int size;
+	unsigned int size, csize;
 
 	if (argc < 2 || get_sha1_hex(argv[1], tree_sha1) < 0)
 		usage(commit_tree_usage);
@@ -174,8 +186,10 @@
 	add_buffer(&buffer, &size, "committer %s <%s> %s\n\n", commitgecos, commitemail, realdate);
 
 	/* And add the comment */
+	csize = size;
 	while (fgets(comment, sizeof(comment), stdin) != NULL)
-		add_buffer(&buffer, &size, "%s", comment);
+		if (size > csize || ! whitespace(comment))
+			add_buffer(&buffer, &size, "%s", comment);
 
 	write_sha1_file(buffer, size, "commit", commit_sha1);
 	printf("%s\n", sha1_to_hex(commit_sha1));

^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: Linus Torvalds @ 2005-05-27 17:18 UTC (permalink / raw)
  To: Markus F.X.J. Oberhumer; +Cc: git
In-Reply-To: <42971EB4.2050403@oberhumer.com>



On Fri, 27 May 2005, Markus F.X.J. Oberhumer wrote:
>
> This trivial patch fixes an obvious ptrdiff_t vs. int mismatch. Which
> makes we wonder why Linus isn't hitting this on his ppc64 - maybe it's
> time to start using -Werror...

My _kernel_ is 64-bit, but my user-space is all 32-bit because (a) I don't
care about user-space and (b) I'm lazy and (c)  all the distributions
contain just 32-bit programs and a ppc64 kernel has no trouble running
32-bit programs.

I can compile a kernel with "-m64", but since I don't have any 64-bit
libraries installed, user space doesn't work that well ;)

		Linus

^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: Linus Torvalds @ 2005-05-27 17:25 UTC (permalink / raw)
  To: Markus F.X.J. Oberhumer; +Cc: git
In-Reply-To: <Pine.LNX.4.58.0505271013260.17402@ppc970.osdl.org>



On Fri, 27 May 2005, Linus Torvalds wrote:
> 
> I can compile a kernel with "-m64", but since I don't have any 64-bit
> libraries installed, user space doesn't work that well ;)

Btw, since this was the piece of code that I didn't bother simplifying 
last time it was discussed (then it was just "ugly"), I took a different 
approach instead, and committed the following..

		Linus

---
diff-tree 84c1afd7e7c69c6c3c0677d5ee01925d4c70d318 (from a9c9cef161b26ca610783dd0b180d18956c7b119)
Author: Linus Torvalds <torvalds@ppc970.osdl.org>
Date:   Fri May 27 10:22:09 2005 -0700
    
    git-diff-tree: simplify header output with '-z'
    
    No need to make them multiple lines, in fact we explicitly don't want that.
    
    This also fixes a 64-bit problem pointed out by Markus F.X.J. Oberhumer,
    where we gave "%.*s" a "ptrdiff_t" length argument instead of an "int".

diff --git a/diff-tree.c b/diff-tree.c
--- a/diff-tree.c
+++ b/diff-tree.c
@@ -269,18 +269,11 @@ static int call_diff_flush(void)
 		return 0;
 	}
 	if (header) {
-		if (diff_output_format == DIFF_FORMAT_MACHINE) {
-			const char *ep, *cp;
-			for (cp = header; *cp; cp = ep) {
-				ep = strchr(cp, '\n');
-				if (ep == 0) ep = cp + strlen(cp);
-				printf("%.*s%c", ep-cp, cp, 0);
-				if (*ep) ep++;
-			}
-		}
-		else {
-			printf("%s", header);
-		}
+		const char *fmt = "%s";
+		if (diff_output_format == DIFF_FORMAT_MACHINE)
+			fmt = "%s%c";
+		
+		printf(fmt, header, 0);
 		header = NULL;
 	}
 	diff_flush(diff_output_format, 1);

^ permalink raw reply

* Re: ALSA official git repository
From: Linus Torvalds @ 2005-05-27 17:28 UTC (permalink / raw)
  To: Sean; +Cc: Jaroslav Kysela, LKML, Andrew Morton, Git Mailing List
In-Reply-To: <3516.10.10.10.24.1117213207.squirrel@linux1>



On Fri, 27 May 2005, Sean wrote:
> >
> > Now, arguably gitweb should ignore whitespace at the beginning, but
> > equally arguably your commits shouldn't have them either...
> 
> Perhaps git should enforce this?  Patch attached.
> 
> Remove leading empty lines from commit messages.
> 
> Signed-off-by: Sean Estabrooks <seanlkml@sympatico.ca>

I'm not sure.

The thing is, right now git allows binary commit messages if somebody
really wants to. Now, a lot of the _tools_ end up only printing up to the
first '\0' or something, but in general, maybe somebody actually wants to
embed his own strange stuff in there (eg use encryption but still use
standard git tools).

Which makes me worry. So I _do_ do whitespace cleanup in my "apply email 
patches" scripts, but I'm not sure whether the core should care about the 
data that people feed it, even for commit messages.

Opinions?

		Linus

^ permalink raw reply

* Re: BEWARE: mkdelta is broken
From: Nicolas Pitre @ 2005-05-27 17:37 UTC (permalink / raw)
  To: Thomas Glanzmann; +Cc: git
In-Reply-To: <20050527094133.GR24351@cip.informatik.uni-erlangen.de>

On Fri, 27 May 2005, Thomas Glanzmann wrote:

> Hello,
> 
> > The current delta loop detection logic is broken.  So if you have 
> > multiple merged branches or you have a changeset that revert things then 
> > you might end up with a delta loop and fsck-cache will effectively 
> > complain about unresolved deltas and assorted dangling/broken object 
> > links.
> 
> I wanted to give you heads-up, but I forgot it. But I think noone is
> using it at the moment otherwise they had complained in the first place.
> 
> However ... I took linux-2.6 repository. Ran git-deltafy-script &&
> git-deltafy-script -d 0 and it segfaulted on me. Same for my mutt-cvs
> import.

Yes, that's because you end up with delta loops.  When trying to expand 
the delta, it keeps looping and allocating memory until killed.

I fixed that already and am preparing a patch at the moment.


Nicolas

^ permalink raw reply

* Re: BEWARE: mkdelta is broken
From: Thomas Glanzmann @ 2005-05-27 17:42 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: git
In-Reply-To: <Pine.LNX.4.62.0505271335170.16151@localhost.localdomain>

Hello,

> Yes, that's because you end up with delta loops.  When trying to expand 
> the delta, it keeps looping and allocating memory until killed.

> I fixed that already and am preparing a patch at the moment.

perfekt. I am happy to give it a test.

	Thomas

^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: Markus F.X.J. Oberhumer @ 2005-05-27 17:42 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.58.0505271024280.17402@ppc970.osdl.org>

Linus Torvalds wrote:
> 
> On Fri, 27 May 2005, Linus Torvalds wrote:
> 
>>I can compile a kernel with "-m64", but since I don't have any 64-bit
>>libraries installed, user space doesn't work that well ;)
> 
> 
> Btw, since this was the piece of code that I didn't bother simplifying 
> last time it was discussed (then it was just "ugly"), I took a different 
> approach instead, and committed the following..

Many thanks. BTW, this is probably completely irrelevant on all 
real-world machines and calling conventions, but still, lying on the 
number of arguments in vararg-functions is not my favourite practice.

~Markus

> 
> 		Linus
> 
> ---
> diff-tree 84c1afd7e7c69c6c3c0677d5ee01925d4c70d318 (from a9c9cef161b26ca610783dd0b180d18956c7b119)
> Author: Linus Torvalds <torvalds@ppc970.osdl.org>
> Date:   Fri May 27 10:22:09 2005 -0700
>     
>     git-diff-tree: simplify header output with '-z'
>     
>     No need to make them multiple lines, in fact we explicitly don't want that.
>     
>     This also fixes a 64-bit problem pointed out by Markus F.X.J. Oberhumer,
>     where we gave "%.*s" a "ptrdiff_t" length argument instead of an "int".
> 
> diff --git a/diff-tree.c b/diff-tree.c
> --- a/diff-tree.c
> +++ b/diff-tree.c
> @@ -269,18 +269,11 @@ static int call_diff_flush(void)
>  		return 0;
>  	}
>  	if (header) {
> -		if (diff_output_format == DIFF_FORMAT_MACHINE) {
> -			const char *ep, *cp;
> -			for (cp = header; *cp; cp = ep) {
> -				ep = strchr(cp, '\n');
> -				if (ep == 0) ep = cp + strlen(cp);
> -				printf("%.*s%c", ep-cp, cp, 0);
> -				if (*ep) ep++;
> -			}
> -		}
> -		else {
> -			printf("%s", header);
> -		}
> +		const char *fmt = "%s";
> +		if (diff_output_format == DIFF_FORMAT_MACHINE)
> +			fmt = "%s%c";
> +		
> +		printf(fmt, header, 0);
>  		header = NULL;
>  	}
>  	diff_flush(diff_output_format, 1);
> -
> To unsubscribe from this list: send the line "unsubscribe git" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 


-- 
Markus Oberhumer, <markus@oberhumer.com>, http://www.oberhumer.com/

^ permalink raw reply

* Re: ALSA official git repository
From: Jaroslav Kysela @ 2005-05-27 17:43 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: LKML, Andrew Morton, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505270903230.17402@ppc970.osdl.org>

On Fri, 27 May 2005, Linus Torvalds wrote:

> On Fri, 27 May 2005, Jaroslav Kysela wrote:
> > 
> > 	I created new git tree for the ALSA project at:
> > 
> > rsync://rsync.kernel.org/pub/scm/linux/kernel/git/perex/alsa.git
> 
> Your scripts(?) to generate these things are a bit strange, since they
> leave an extra empty line in the commit message, which confuses at least
> gitweb (ie just look at
> 
>    http://www.kernel.org/git/?p=linux/kernel/git/perex/alsa.git;a=summary
> 
> and note how the summary thing looks empty).

Okay, sorry for this small bug. I'll recreate the ALSA git tree with
proper comments again. Also, the author is not correct (should be taken
from the first Signed-off-by:).

						Jaroslav

-----
Jaroslav Kysela <perex@suse.cz>
Linux Kernel Sound Maintainer
ALSA Project, SUSE Labs

^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: H. Peter Anvin @ 2005-05-27 17:58 UTC (permalink / raw)
  To: Markus F.X.J. Oberhumer; +Cc: Linus Torvalds, git
In-Reply-To: <42975C1F.8070102@oberhumer.com>

Markus F.X.J. Oberhumer wrote:
> 
> Many thanks. BTW, this is probably completely irrelevant on all 
> real-world machines and calling conventions, but still, lying on the 
> number of arguments in vararg-functions is not my favourite practice.
> 

Bullshit.

Think about the way stdarg works, and you should quickly figure out that 
it is perfectly safe to pass extra arguments.

However, this code can also be rewritten:

	fputs(header, stdout);
	if (diff_output_format == DIFF_FORMAT_MACHINE)
		putchar('\0');

... which is probably faster anyway.

	-hpa

^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: Linus Torvalds @ 2005-05-27 18:13 UTC (permalink / raw)
  To: Markus F.X.J. Oberhumer; +Cc: git
In-Reply-To: <42975C1F.8070102@oberhumer.com>



On Fri, 27 May 2005, Markus F.X.J. Oberhumer wrote:
> 
> Many thanks. BTW, this is probably completely irrelevant on all 
> real-world machines and calling conventions, but still, lying on the 
> number of arguments in vararg-functions is not my favourite practice.

Only using a subset is guaranteed to work - it's not about "real-world 
machines", it's about the fact that it's not a C compiler any more if it 
doesn't work.

		Linus

^ permalink raw reply

* Re: ALSA official git repository
From: Linus Torvalds @ 2005-05-27 18:16 UTC (permalink / raw)
  To: Jaroslav Kysela; +Cc: LKML, Andrew Morton, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505271941250.1757@pnote.perex-int.cz>



On Fri, 27 May 2005, Jaroslav Kysela wrote:
> 
> Okay, sorry for this small bug. I'll recreate the ALSA git tree with
> proper comments again. Also, the author is not correct (should be taken
> from the first Signed-off-by:).

Hmm.. That's not always true in general, since Sign-off does allow to sign
off on other peoples patches (see the "(b)" clause in DCO), but maybe in
the ALSA tree it is.

Are you coming from a CVS tree or what? It's clearly not my patch 
applicator thing, since that one removes spaces, I'm pretty sure.

		Linus

^ permalink raw reply

* Re: [PATCH] ls-tree path restriction semantics fixes
From: Junio C Hamano @ 2005-05-27 18:16 UTC (permalink / raw)
  To: Jason McMullan; +Cc: git
In-Reply-To: <20050527120851.GA11823@port.evillabs.net>

>>>>> "JM" == Jason McMullan <jason.mcmullan@timesys.com> writes:

JM> This patch fixes the git-ls-tree semantics to be less stupid, namely:
JM> 	* ls of a 'tree' path should just return the SHA1 of the tree
JM> 	* ls of a 'tree' path with a trailing '/' should work properly
JM> 	* ls of two identical paths should have the same output as ls of
JM> 	  a single path. (I consider ls-tree's output to be a hash dictionary)

I haven't read your code yet, but...

JM> Old Results:

JM> 	$ git-ls-tree t
JM> 	040000 tree 4eeb3990955b8badc4c14712b89d8cd9fff02f15    t
JM> 	100644 blob 6882e23be568ccf14f3adb0c766139086f2ee952    t/Makefile
JM> 	100644 blob 2a94fdb0b83ab5fcbf1a2c6edaf36c2dbe765ec6    t/README
JM> 	100644 blob d920c6b3a3bfbb5994244a78d1ad99ce02748122    t/lib-read-tree-m-3way.sh
JM> 	...

I presume the counterpart to this one in your "New Results"
example, which is spelled "git-ls-tree f" is a typo of
"git-ls-tree t", but if that is the case I strongly disagree.

What you really want is something similar to '-d' flag to
/bin/ls.  You are interested in the directory itself not its
contents and I think your gripe is that giving a path that
matches a tree always descends into it (i.e. there is no way to
do the equivalent of "/bin/ls -d t").  I agree that it is a
problem, but changing "/bin/ls t" not to show the directory
contents of "t" is not a solution.

JM> 	$ git-ls-tree t/
JM> 	(no output)

I agree with you that this is not what we want and we should
behave the same way as "git-ls-tree t" would in this case.

JM> 	$ git-ls-tree t t
JM> 	040000 tree 4eeb3990955b8badc4c14712b89d8cd9fff02f15    t

I do not know what you wanted to say in this example.  Your
"Old" and "New" look the same to me.






^ permalink raw reply

* Re: [PATCH] Fix ptrdiff_t vs. int
From: Junio C Hamano @ 2005-05-27 18:20 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Markus F.X.J. Oberhumer, git
In-Reply-To: <Pine.LNX.4.58.0505271024280.17402@ppc970.osdl.org>

>>>>> "LT" == Linus Torvalds <torvalds@osdl.org> writes:

LT> On Fri, 27 May 2005, Linus Torvalds wrote:
>> 
>> I can compile a kernel with "-m64", but since I don't have any 64-bit
>> libraries installed, user space doesn't work that well ;)

LT> Btw, since this was the piece of code that I didn't bother simplifying 
LT> last time it was discussed (then it was just "ugly"), I took a different 
LT> approach instead, and committed the following..

I thought about this after getting your "Your solution is the
yucky one" message, but make sure that you are not adding an
extra blank line when this is combined with diff-helper.  I
think you currently do.


^ permalink raw reply

* Re: [PATCH] Diff updates, fixing pathspec and rename/copy interaction.
From: Junio C Hamano @ 2005-05-27 18:22 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505270848220.17402@ppc970.osdl.org>

>>>>> "LT" == Linus Torvalds <torvalds@osdl.org> writes:

LT> I would much prefer a _much_ simpler fix at least for the pathname part,
LT> which is to just always require that pathspec handling is done _first_.

LT> Why? Because that's fundamentally how git-diff-tree has to work, and it's
LT> how my mental model has always been: the path limitations are a
LT> first-order filter, and if you give a directory, the end result should
LT> always look exactly as if that directory was a project of its own.

Enlightment!

My initial thinking was to do rename/copy first, deliberately
ignoring pathspec (I was thinking about tweaking diff-tree to
break out of its built-in pathspec limit to feed diffcore
_everything_, even things outside of the specified directories
when --detect-copies-harder is used), to cast a wider net so
that it can come up with "the best matches".  But what you just
said made me realize that the definition of "the best matches" I
had in mind was not best at all.  I agree with you 100% that
pathspec should come first before everything else to limit the
world diffcore operates in, not just to limit the set of output
paths.

I still want to do what this patch does for another reason.  It
is so much simpler if I do not have to carry around _some_
unmatched pair after rename/copy.  The earlier representation
was forcing all the downstream diffcore filters to be aware of
what rename/copy did, which was simply _wrong_.  They should not
have to care.

About ordering of pickaxe and rename/copy, I think for most uses
of diffcore filters, not limited to pickaxe, would make more
sense if they come after rename/copy when rename/copy detection
is in effect.  What I really wanted to do (I made a side note
comment in the earlier discussion about this, saying "something
like streams", but have not pursued it further) is to make these
diffcore filters/transformations stackable so that the main
programs can control not just if each of them is used or not,
but the order of application of the used ones.  I cannot offhand
think of a good use case of having pickaxe come _before_
rename/copy, but there may be a case the user want to have
things in that order, and having the application order not
hardcoded in three diff-* brothers is a major problem if we
wanted to give that option.


^ permalink raw reply

* Re: ALSA official git repository
From: Junio C Hamano @ 2005-05-27 18:47 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Sean, Jaroslav Kysela, LKML, Andrew Morton, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505271026020.17402@ppc970.osdl.org>

>>>>> "LT" == Linus Torvalds <torvalds@osdl.org> writes:

LT> On Fri, 27 May 2005, Sean wrote:
>> >
>> > Now, arguably gitweb should ignore whitespace at the beginning, but
>> > equally arguably your commits shouldn't have them either...
>> 
>> Perhaps git should enforce this?  Patch attached.
>> 
>> Remove leading empty lines from commit messages.
>> 
>> Signed-off-by: Sean Estabrooks <seanlkml@sympatico.ca>

LT> I'm not sure.
LT> Opinions?

Porcelains and gitweb should play with each other nicely, but
the core should _not_ care by default.

An extra option ("--text", perhaps) to git-commit-tree is
acceptable to me, and it may be even a good thing to have.  It
would make life a bit easiear for Porcelain writers if nothing
else.  If that is to happen, I would say we could do more than
just leading blank line removal.  We can also remove trailing
blanks before each LF, tabify indented log message contents, and
remove empty lines before EOF.


^ permalink raw reply

* More gitweb queries..
From: Linus Torvalds @ 2005-05-27 19:24 UTC (permalink / raw)
  To: Kay Sievers, Git Mailing List


Kay,
 just a few more quickie suggestions if you don't mind..

 - looking around, the ALSA guys aren't the only ones that start off with 
   an empty line, so it's probably worth fixing the summary etc to ignore 
   whitespace at the beginning rather than give empty summary reasons.

 - any reason to limit the "summary" page to just the last 14 changes? The 
   "log" thing you can ask to go back further, it would be nice to have
   something like a "last 100" thing for summaries too, especially since 
   the summary is so nice and dense, so you can actually get a nice view 
   of what has happened without scrolling _too_ much.

 - I was in the "commitdiff" thing, and initially thought that there was 
   no way to get back to the "summary" view.

   It turns out I was wrong (the summary is reachable by just clicking at
   the project name itself in the top header), but it's a bit strange that
   the "commitdiff" thing has an explicit link back to itself (hey, 
   consistency is good, so I'm not complaining), but the link back to the
   summary page is implicit.

   So how about adding an explicit "summary" link to the list of other 
   explicit links (log, commit, commitdiff and tree) at the top of the 
   page?

I actually like browsing other peoples projects with the gitweb
interfaces, it's both responsive and verbose enough to really say
something good. In contrast, cvsweb is always just a mess of "these are
the files, go at it", which is totally pointless and doesn't tell anything
about what is actually happening in the project.

So dammit, I'm very biased indeed, but I'm just looking at gitweb, and
comparing it to both the CVS and SVN web things, and they just reinforce
my conviction that CVS is absolute crap, and I find myself surprised by
how crap SVN also appears.

[ In other words, while I have all these stupid requests for you, I 
  think gitweb is just _way_ better and more useful than something like
  viewcvs (or cvsweb). So don't mind my small gripes, I only have them 
  because I like this thing.

  On that small note, I also find "gitk" very cool indeed, too bad about 
  the fact that tk/tcl seems to always end up looking so _ugly_. Is there 
  any way to get anti-aliased fonts and a less 'Motify' blocky look from 
  tcl/tk? Every time I see that, I feel like I'm back in the last century  
  or something.

  Combining some of the features of the two (that über-cool revision 
  history graph from gitk rules, for example) might be cool. I get the 
  urge to do octopus-merges in the kernel just because of how good they
  look in gitk ;) ]

			Linus

^ permalink raw reply

* Re: [PATCH] ls-tree path restriction semantics fixes
From: Jason McMullan @ 2005-05-27 19:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vmzqgzg8a.fsf@assigned-by-dhcp.cox.net>

[-- Attachment #1: Type: text/plain, Size: 1147 bytes --]

On Fri, 2005-05-27 at 11:16 -0700, Junio C Hamano wrote:
> What you really want is something similar to '-d' flag to
> /bin/ls.  You are interested in the directory itself not its
> contents and I think your gripe is that giving a path that
> matches a tree always descends into it (i.e. there is no way to
> do the equivalent of "/bin/ls -d t").  I agree that it is a
> problem, but changing "/bin/ls t" not to show the directory
> contents of "t" is not a solution.

  git-ls-tree reporting just the tree's hash is valid, because if
you want everything in that tree, you can just do:

git-ls-tree `git-ls-tree HEAD path/dir | (read m t h n; echo $h)`

  I don't see the problem there.


> JM> 	$ git-ls-tree t t
> JM> 	040000 tree 4eeb3990955b8badc4c14712b89d8cd9fff02f15    t
> 
> I do not know what you wanted to say in this example.  Your
> "Old" and "New" look the same to me.

The problem was that 't' and 't t' produced *vastly* different output
in the old code. 't' would emit everything in the tree, and 't t' would
only emit t's hash.

-- 
Jason McMullan <jason.mcmullan@timesys.com>
TimeSys Corporation


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply

* Re: More gitweb queries..
From: Thomas Glanzmann @ 2005-05-27 19:29 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Kay Sievers, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505271145570.17402@ppc970.osdl.org>

Hello,

> I get the urge to do octopus-merges in the kernel just because of how
> good they look in gitk ;) ]

talking about octopus-merges ... I don't understand how they work. What
happens if one file is touched in every of the 8 trees. How can that be
handled?

	Thomas

^ permalink raw reply

* Re: More gitweb queries..
From: Junio C Hamano @ 2005-05-27 19:32 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Kay Sievers, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505271145570.17402@ppc970.osdl.org>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=iso-2022-jp-2, Size: 376 bytes --]

>>>>> "LT" == Linus Torvalds <torvalds@osdl.org> writes:

LT>   Combining some of the features of the two (that ^[.A^[N|ber-cool revision 
LT>   history graph from gitk rules, for example) might be cool. I get the 
LT>   urge to do octopus-merges in the kernel just because of how good they
LT>   look in gitk ;) ]

Hey, Octopus is what you explicitly told me not to do ;-).




^ permalink raw reply

* Re: More gitweb queries..
From: Thomas Glanzmann @ 2005-05-27 19:31 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Kay Sievers, Git Mailing List
In-Reply-To: <Pine.LNX.4.58.0505271145570.17402@ppc970.osdl.org>

Hello,

>  - looking around, the ALSA guys aren't the only ones that start off with 
>    an empty line, so it's probably worth fixing the summary etc to ignore 
>    whitespace at the beginning rather than give empty summary reasons.

for me the formatting of an empty commit comment screw up the layout. I
don't know if it already is fixed, if not, I will soon report a patch.

	Thomas

^ permalink raw reply

* [PATCH] mkdelta enhancements (take 2)
From: Nicolas Pitre @ 2005-05-27 19:29 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git


Although it was described as such, git-mkdelta didn't really attempt to
find the best delta against any previous object in the list but was only
able to create a delta against the preceeding object.  This patch 
reworks the code to fix that limitation and hopefully makes it a bit
clearer than before.

This means that

        git-mkdelta sha1 sha2 sha3 sha4 sha5 sha6

will now create a sha2 delta against sha1, a sha3 delta against either
sha2 or sha1 and keep the best one, a sha4 delta against either sha3,
sha2 or sha1, etc.  The --max-behind argument limits that search for the
best delta to the specified number of previous objects in the list.  If
no limit is specified it is unlimited (note: it might run out of 
memory with long object lists).

Also added a -q (quiet) switch so it is possible to have 3 levels of
output: -q for nothing, -v for verbose, and if none of -q nor -v is
specified then only actual changes on the object database are shown.

Finally the git-deltafy-script has been updated accordingly, and some 
bugs fixed (thanks to Stephen C. Tweedie for spotting them).

This version has been toroughly tested and I think it might be ready
for public consumption.

Signed-off-by: Nicolas Pitre <nico@cam.org>

diff --git a/git-deltafy-script b/git-deltafy-script
old mode 100644
new mode 100755
--- a/git-deltafy-script
+++ b/git-deltafy-script
@@ -1,40 +1,67 @@
 #!/bin/bash
 
-# Script to deltafy an entire GIT repository based on the commit list.
+# Example script to deltafy an entire GIT repository based on the commit list.
 # The most recent version of a file is the reference and previous versions
 # are made delta against the best earlier version available. And so on for
-# successive versions going back in time.  This way the delta overhead is
-# pushed towards older version of any given file.
-#
-# NOTE: the "best earlier version" is not implemented in mkdelta yet
-#       and therefore only the next eariler version is used at this time.
-#
-# TODO: deltafy tree objects as well.
+# successive versions going back in time.  This way the increasing delta
+# overhead is pushed towards older versions of any given file.
 #
 # The -d argument allows to provide a limit on the delta chain depth.
-# If 0 is passed then everything is undeltafied.
+# If 0 is passed then everything is undeltafied.  Limiting the delta
+# depth is meaningful for subsequent access performance to old revisions.
+# A value of 16 might be a good compromize between performance and good
+# space saving.  Current default is unbounded.
+#
+# The --max-behind=30 argument is passed to git-mkdelta so to keep
+# combinations and memory usage bounded a bit.  If you have lots of memory
+# and CPU power you may remove it (or set to 0) to let git-mkdelta find the
+# best delta match regardless of the number of revisions for a given file.
+# You can also make the value smaller to make it faster and less
+# memory hungry.  A value of 5 ought to still give pretty good results.
+# When set to 0 or ommitted then look behind is unbounded.  Note that
+# git-mkdelta might die with a segmentation fault in that case if it
+# runs out of memory.  Note that the GIT repository will still be consistent
+# even if git-mkdelta dies unexpectedly.
 
 set -e
 
 depth=
 [ "$1" == "-d" ] && depth="--max-depth=$2" && shift 2
 
+function process_list() {
+	if [ "$list" ]; then
+		echo "Processing $curr_file"
+		echo "$head $list" | xargs git-mkdelta $depth --max-behind=30 -v
+	fi
+}
+
 curr_file=""
 
 git-rev-list HEAD |
-git-diff-tree -r --stdin |
-awk '/^:/ { if ($5 == "M" || $5 == "N") print $4, $6 }' |
+git-diff-tree -r -t --stdin |
+awk '/^:/ { if ($5 == "M" || $5 == "N") print $4, $6;
+            if ($5 == "M") print $3, $6 }' |
 LC_ALL=C sort -s -k 2 | uniq |
 while read sha1 file; do
 	if [ "$file" == "$curr_file" ]; then
 		list="$list $sha1"
 	else
-		if [ "$list" ]; then
-			echo "Processing $curr_file"
-			echo "$head $list" | xargs git-mkdelta $depth -v
-		fi
+		process_list
 		curr_file="$file"
 		list=""
 		head="$sha1"
 	fi
 done
+process_list
+
+curr_file="root directory"
+head=""
+list="$(
+	git-rev-list HEAD |
+	while read commit; do
+		git-cat-file commit $commit |
+		sed -n 's/tree //p;Q'
+	done
+	)"
+process_list
+
diff --git a/mkdelta.c b/mkdelta.c
--- a/mkdelta.c
+++ b/mkdelta.c
@@ -98,21 +98,16 @@ static void *create_delta_object(char *b
 	return create_object(buf, len, hdr, hdrlen, size);
 }
 
-static unsigned long get_object_size(unsigned char *sha1)
-{
-	struct stat st;
-	if (stat(sha1_file_name(sha1), &st))
-		die("%s: %s", sha1_to_hex(sha1), strerror(errno));
-	return st.st_size;
-}
-
-static void *get_buffer(unsigned char *sha1, char *type, unsigned long *size)
+static void *get_buffer(unsigned char *sha1, char *type,
+			unsigned long *size, unsigned long *compsize)
 {
 	unsigned long mapsize;
 	void *map = map_sha1_file(sha1, &mapsize);
 	if (map) {
 		void *buffer = unpack_sha1_file(map, mapsize, type, size);
 		munmap(map, mapsize);
+		if (compsize)
+			*compsize = mapsize;
 		if (buffer)
 			return buffer;
 	}
@@ -120,198 +115,246 @@ static void *get_buffer(unsigned char *s
 	return NULL;
 }
 
-static void *expand_delta(void *delta, unsigned long delta_size, char *type,
-			  unsigned long *size, unsigned int *depth, char *head)
+static void *expand_delta(void *delta, unsigned long *size, char *type,
+			  unsigned int *depth, unsigned char **links)
 {
 	void *buf = NULL;
-	*depth++;
-	if (delta_size < 20) {
+	unsigned int level = (*depth)++;
+	if (*size < 20) {
 		error("delta object is bad");
 		free(delta);
 	} else {
 		unsigned long ref_size;
-		void *ref = get_buffer(delta, type, &ref_size);
+		void *ref = get_buffer(delta, type, &ref_size, NULL);
 		if (ref && !strcmp(type, "delta"))
-			ref = expand_delta(ref, ref_size, type, &ref_size,
-					   depth, head);
-		else
-			memcpy(head, delta, 20);
-		if (ref)
-			buf = patch_delta(ref, ref_size, delta+20,
-					  delta_size-20, size);
-		free(ref);
+			ref = expand_delta(ref, &ref_size, type, depth, links);
+		else if (ref)
+{
+			*links = xmalloc(*depth * 20);
+}
+		if (ref) {
+			buf = patch_delta(ref, ref_size, delta+20, *size-20, size);
+			free(ref);
+			if (buf)
+				memcpy(*links + level*20, delta, 20);
+			else
+				free(*links);
+		}
 		free(delta);
 	}
 	return buf;
 }
 
 static char *mkdelta_usage =
-"mkdelta [ --max-depth=N ] <reference_sha1> <target_sha1> [ <next_sha1> ... ]";
+"mkdelta [--max-depth=N] [--max-behind=N] <reference_sha1> <target_sha1> [<next_sha1> ...]";
 
+struct delta {
+	unsigned char sha1[20];		/* object sha1 */
+	unsigned long size;		/* object size */
+	void *buf;			/* object content */
+	unsigned char *links;		/* delta reference links */
+	unsigned int depth;		/* delta depth */
+};
+	
 int main(int argc, char **argv)
 {
-	unsigned char sha1_ref[20], sha1_trg[20], head_ref[20], head_trg[20];
-	char type_ref[20], type_trg[20];
-	void *buf_ref, *buf_trg, *buf_delta;
-	unsigned long size_ref, size_trg, size_orig, size_delta;
-	unsigned int depth_ref, depth_trg, depth_max = -1;
-	int i, verbose = 0;
+	struct delta *ref, trg;
+	char ref_type[20], trg_type[20], *skip_reason;
+	void *best_buf;
+	unsigned long best_size, orig_size, orig_compsize;
+	unsigned int r, orig_ref, best_ref, nb_refs, next_ref, max_refs = 0;
+	unsigned int i, duplicate, skip_lvl, verbose = 0, quiet = 0;
+	unsigned int max_depth = -1;
 
 	for (i = 1; i < argc; i++) {
 		if (!strcmp(argv[i], "-v")) {
 			verbose = 1;
+			quiet = 0;
+		} else if (!strcmp(argv[i], "-q")) {
+			quiet = 1;
+			verbose = 0;
 		} else if (!strcmp(argv[i], "-d") && i+1 < argc) {
-			depth_max = atoi(argv[++i]);
+			max_depth = atoi(argv[++i]);
 		} else if (!strncmp(argv[i], "--max-depth=", 12)) {
-			depth_max = atoi(argv[i]+12);
+			max_depth = atoi(argv[i]+12);
+		} else if (!strcmp(argv[i], "-b") && i+1 < argc) {
+			max_refs = atoi(argv[++i]);
+		} else if (!strncmp(argv[i], "--max-behind=", 13)) {
+			max_refs = atoi(argv[i]+13);
 		} else
 			break;
 	}
 
-	if (i + (depth_max != 0) >= argc)
+	if (i + (max_depth != 0) >= argc)
 		usage(mkdelta_usage);
 
-	if (get_sha1(argv[i], sha1_ref))
-		die("bad sha1 %s", argv[i]);
-	depth_ref = 0;
-	buf_ref = get_buffer(sha1_ref, type_ref, &size_ref);
-	if (buf_ref && !strcmp(type_ref, "delta"))
-		buf_ref = expand_delta(buf_ref, size_ref, type_ref,
-				       &size_ref, &depth_ref, head_ref);
-	else
-		memcpy(head_ref, sha1_ref, 20);
-	if (!buf_ref)
-		die("unable to obtain initial object %s", argv[i]);
-
-	if (depth_ref > depth_max) {
-		if (restore_original_object(buf_ref, size_ref, type_ref, sha1_ref))
-			die("unable to restore %s", argv[i]);
-		if (verbose)
-			printf("undelta %s (depth was %d)\n", argv[i], depth_ref);
-		depth_ref = 0;
-	}
-
-	/*
-	 * TODO: deltafication should be tried against any early object
-	 * in the object list and not only the previous object.
-	 */
+	if (!max_refs || max_refs > argc - i)
+		max_refs = argc - i;
+	ref = xmalloc(max_refs * sizeof(*ref));
+	for (r = 0; r < max_refs; r++)
+		ref[r].buf = ref[r].links = NULL;
+	next_ref = nb_refs = 0;
 
-	while (++i < argc) {
-		if (get_sha1(argv[i], sha1_trg))
+	do {
+		if (get_sha1(argv[i], trg.sha1))
 			die("bad sha1 %s", argv[i]);
-		depth_trg = 0;
-		buf_trg = get_buffer(sha1_trg, type_trg, &size_trg);
-		if (buf_trg && !size_trg) {
+		trg.buf = get_buffer(trg.sha1, trg_type, &trg.size, &orig_compsize);
+		if (trg.buf && !trg.size) {
 			if (verbose)
 				printf("skip    %s (object is empty)\n", argv[i]);
 			continue;
 		}
-		size_orig = size_trg;
-		if (buf_trg && !strcmp(type_trg, "delta")) {
-			if (!memcmp(buf_trg, sha1_ref, 20)) {
-				/* delta already in place */
-				depth_ref++;
-				memcpy(sha1_ref, sha1_trg, 20);
-				buf_ref = patch_delta(buf_ref, size_ref,
-						      buf_trg+20, size_trg-20,
-						      &size_ref);
-				if (!buf_ref)
-					die("unable to apply delta %s", argv[i]);
-				if (depth_ref > depth_max) {
-					if (restore_original_object(buf_ref, size_ref,
-								    type_ref, sha1_ref))
-						die("unable to restore %s", argv[i]);
-					if (verbose)
-						printf("undelta %s (depth was %d)\n", argv[i], depth_ref);
-					depth_ref = 0;
-					continue;
-				}
-				if (verbose)
-					printf("skip    %s (delta already in place)\n", argv[i]);
-				continue;
+		orig_size = trg.size;
+		orig_ref = -1;
+		trg.depth = 0;
+		trg.links = NULL;
+		if (trg.buf && !strcmp(trg_type, "delta")) {
+			for (r = 0; r < nb_refs; r++)
+				if (!memcmp(trg.buf, ref[r].sha1, 20))
+					break;
+			if (r < nb_refs) {
+				/* no need to reload the reference object */
+				trg.depth = ref[r].depth + 1;
+				trg.links = xmalloc(trg.depth*20);
+				memcpy(trg.links, trg.buf, 20);
+				memcpy(trg.links+20, ref[r].links, ref[r].depth*20);
+				trg.buf = patch_delta(ref[r].buf, ref[r].size,
+						      trg.buf+20, trg.size-20,
+						      &trg.size);
+				strcpy(trg_type, ref_type);
+				orig_ref = r;
+			} else {
+				trg.buf = expand_delta(trg.buf, &trg.size, trg_type,
+						       &trg.depth, &trg.links);
 			}
-			buf_trg = expand_delta(buf_trg, size_trg, type_trg,
-					       &size_trg, &depth_trg, head_trg);
-		} else
-			memcpy(head_trg, sha1_trg, 20);
-		if (!buf_trg)
-			die("unable to read target object %s", argv[i]);
-
-		if (depth_trg > depth_max) {
-			if (restore_original_object(buf_trg, size_trg, type_trg, sha1_trg))
-				die("unable to restore %s", argv[i]);
-			if (verbose)
-				printf("undelta %s (depth was %d)\n", argv[i], depth_trg);
-			depth_trg = 0;
-			size_orig = size_trg;
 		}
+		if (!trg.buf)
+			die("unable to read target object %s", argv[i]);
 
-		if (depth_max == 0)
-			goto skip;
-
-		if (strcmp(type_ref, type_trg))
+		if (!nb_refs) {
+			strcpy(ref_type, trg_type);
+		} else if (max_depth && strcmp(ref_type, trg_type)) {
 			die("type mismatch for object %s", argv[i]);
-
-		if (!size_ref) {
-			if (verbose)
-				printf("skip    %s (initial object is empty)\n", argv[i]);
-			goto skip;
-		}
-		
-		if (depth_ref + 1 > depth_max) {
-			if (verbose)
-				printf("skip    %s (exceeding max link depth)\n", argv[i]);
-			goto skip;
 		}
 
-		if (!memcmp(head_ref, sha1_trg, 20)) {
-			if (verbose)
-				printf("skip    %s (would create a loop)\n", argv[i]);
-			goto skip;
+		duplicate = 0;
+		best_buf = NULL;
+		best_size = -1;
+		best_ref = -1;
+		skip_lvl = 0;
+		skip_reason = NULL;
+		for (r = 0; max_depth && r < nb_refs; r++) {
+			void *delta_buf, *comp_buf;
+			unsigned long delta_size, comp_size;
+			unsigned int l;
+
+			duplicate = !memcmp(trg.sha1, ref[r].sha1, 20);
+			if (duplicate) {
+				skip_reason = "already seen";
+				break;
+			}
+			if (ref[r].depth >= max_depth) {
+				if (skip_lvl < 1) {
+					skip_reason = "exceeding max link depth";
+					skip_lvl = 1;
+				}
+				continue;
+			}
+			for (l = 0; l < ref[r].depth; l++)
+				if (!memcmp(trg.sha1, ref[r].links + l*20, 20))
+					break;
+			if (l != ref[r].depth) {
+				if (skip_lvl < 2) {
+					skip_reason = "would create a loop";
+					skip_lvl = 2;
+				}
+				continue;
+			}
+			if (trg.depth < max_depth && r == orig_ref) {
+				if (skip_lvl < 3) {
+					skip_reason = "delta already in place";
+					skip_lvl = 3;
+				}
+				continue;
+			}
+			delta_buf = diff_delta(ref[r].buf, ref[r].size,
+					       trg.buf, trg.size, &delta_size);
+			if (!delta_buf)
+				die("out of memory");
+			if (trg.depth < max_depth &&
+			    delta_size+20 >= orig_size) {
+				/* no need to even try to compress if original
+				   object is smaller than this delta */
+				free(delta_buf);
+				if (skip_lvl < 4) {
+					skip_reason = "no size reduction";
+					skip_lvl = 4;
+				}
+				continue;
+			}
+			comp_buf = create_delta_object(delta_buf, delta_size,
+						       ref[r].sha1, &comp_size);
+			if (!comp_buf)
+				die("out of memory");
+			free(delta_buf);
+			if (trg.depth < max_depth &&
+			    comp_size >= orig_compsize) {
+				free(comp_buf);
+				if (skip_lvl < 5) {
+					skip_reason = "no size reduction";
+					skip_lvl = 5;
+				}
+				continue;
+			}
+			if ((comp_size < best_size) ||
+			    (comp_size == best_size &&
+			     ref[r].depth < ref[best_ref].depth)) {
+				free(best_buf);
+				best_buf = comp_buf;
+				best_size = comp_size;
+				best_ref = r;
+			}
 		}
 
-		buf_delta = diff_delta(buf_ref, size_ref, buf_trg, size_trg, &size_delta);
-		if (!buf_delta)
-			die("out of memory");
-
-		/* no need to even try to compress if original
-		   uncompressed is already smaller */
-		if (size_delta+20 < size_orig) {
-			void *buf_obj;
-			unsigned long size_obj;
-			buf_obj = create_delta_object(buf_delta, size_delta,
-						      sha1_ref, &size_obj);
-			free(buf_delta);
-			size_orig = get_object_size(sha1_trg);
-			if (size_obj >= size_orig) {
-				free(buf_obj);
-				if (verbose)
-					printf("skip    %s (original is smaller)\n", argv[i]);
-				goto skip;
-			}
-			if (replace_object(buf_obj, size_obj, sha1_trg))
+		if (best_buf) {
+			if (replace_object(best_buf, best_size, trg.sha1))
 				die("unable to write delta for %s", argv[i]);
-			free(buf_obj);
-			depth_ref++;
-			if (verbose)
-				printf("delta   %s (size=%ld.%02ld%%, depth=%d)\n",
-				       argv[i], size_obj*100 / size_orig,
-				       (size_obj*10000 / size_orig)%100,
-				       depth_ref);
-		} else {
-			free(buf_delta);
-			if (verbose)
-				printf("skip    %s (original is smaller)\n", argv[i]);
-			skip:
-			depth_ref = depth_trg;
-			memcpy(head_ref, head_trg, 20);
+			free(best_buf);
+			free(trg.links);
+			trg.depth = ref[best_ref].depth + 1;
+			trg.links = xmalloc(trg.depth*20);
+			memcpy(trg.links, ref[best_ref].sha1, 20);
+			memcpy(trg.links+20, ref[best_ref].links, ref[best_ref].depth*20);
+			if (!quiet)
+				printf("delta   %s (size=%ld.%02ld%% depth=%d dist=%d)\n",
+				       argv[i], best_size*100 / orig_compsize,
+				       (best_size*10000 / orig_compsize)%100,
+				       trg.depth,
+				       (next_ref - best_ref + max_refs)
+				       % (max_refs + 1) + 1);
+		} else if (trg.depth > max_depth) {
+			if (restore_original_object(trg.buf, trg.size, trg_type, trg.sha1))
+				die("unable to restore %s", argv[i]);
+			if (!quiet)
+				printf("undelta %s (depth was %d)\n",
+				       argv[i], trg.depth);
+			trg.depth = 0;
+			free(trg.links);
+			trg.links = NULL;
+		} else if (skip_reason && verbose) {
+			printf("skip    %s (%s)\n", argv[i], skip_reason);
 		}
 
-		free(buf_ref);
-		buf_ref = buf_trg;
-		size_ref = size_trg;
-		memcpy(sha1_ref, sha1_trg, 20);
-	}
+		if (!duplicate) {
+			free(ref[next_ref].buf);
+			free(ref[next_ref].links);
+			ref[next_ref] = trg;
+			if (++next_ref > nb_refs)
+				nb_refs = next_ref;
+			if (next_ref == max_refs)
+				next_ref = 0;
+		}
+	} while (++i < argc);
 
 	return 0;
 }

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox