Git development
 help / color / mirror / Atom feed
* Re: Cygwin can't handle huge packfiles?
From: Alex Riesen @ 2006-04-03 14:38 UTC (permalink / raw)
  To: Kees-Jan Dijkzeul; +Cc: git
In-Reply-To: <fa0b6e200604030246q21fccb9ar93004ac67d8b28b3@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1189 bytes --]

On 4/3/06, Kees-Jan Dijkzeul <k.j.dijkzeul@gmail.com> wrote:
> I'm trying to get Git to manage a 5Gb source tree. Under linux, this
> works like a charm. Under cygwin, however, I run in to difficulties.
> For example:
>
> $ git-clone sgp-wa/ sgp-wa.clone
> fatal: packfile
> ./objects/pack/pack-56aa013a0234e198467ed37ae5db925764a6ee98.pack
> cannot be mapped.
> fatal: unexpected EOF
> fetch-pack from '/cygdrive/e/Projects/sgp-wa/.git' failed.
>
> To figure out what is happening, I printed the value of errno, which
> turns out to be 12 (Cannot allocate memory). I'm not sure how mmap is

mmap in git on cygwin does not mmaps anything,
but just reads the whole file in memory.

> I'm not sure how to approach this problem. Any tips would be greatly
> appreciated.

I ended up hacking gitfakemmap like in the attached patches (sorry for mime).
It's very ugly and unsafe hack, and it's actually exactly the reason why it was
never submitted. Still, it helps me (it speedups revlist, for
instance), and maybe
it'll help you.
It is a really good example what stupid windows restrictions can do to
a program.

The patch is against git as of 3-Apr-2005, ~10 CET

[-- Attachment #2: cygmmap.patch --]
[-- Type: text/x-patch, Size: 5710 bytes --]

diff --git a/Makefile b/Makefile
index c79d646..8a46436
--- a/Makefile
+++ b/Makefile
@@ -389,7 +389,7 @@ ifdef NO_SETENV
 endif
 ifdef NO_MMAP
 	COMPAT_CFLAGS += -DNO_MMAP
-	COMPAT_OBJS += compat/mmap.o
+	COMPAT_OBJS += compat/mmap.o compat/realmmap.o
 endif
 ifdef NO_IPV6
 	ALL_CFLAGS += -DNO_IPV6
diff --git a/compat/realmmap.c b/compat/realmmap.c
new file mode 100644
index 0000000..8f26641
--- /dev/null
+++ b/compat/realmmap.c
@@ -0,0 +1,26 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include "../git-compat-util.h"
+
+#undef mmap
+#undef munmap
+
+void *realmmap(void *start, size_t length, int prot , int flags, int fd, off_t offset)
+{
+	if (start != NULL || !(flags & MAP_PRIVATE)) {
+		errno = ENOTSUP;
+		return MAP_FAILED;
+	}
+	start = mmap(start, length, prot, flags, fd, offset);
+	return start;
+}
+
+int realmunmap(void *start, size_t length)
+{
+	return munmap(start, length);
+}
+
+
diff --git a/diff.c b/diff.c
index e496905..f1a2cf0 100644
--- a/diff.c
+++ b/diff.c
@@ -450,7 +450,7 @@ int diff_populate_filespec(struct diff_f
 		fd = open(s->path, O_RDONLY);
 		if (fd < 0)
 			goto err_empty;
-		s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
+		s->data = realmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
 		close(fd);
 		if (s->data == MAP_FAILED)
 			goto err_empty;
@@ -482,7 +482,7 @@ void diff_free_filespec_data(struct diff
 	if (s->should_free)
 		free(s->data);
 	else if (s->should_munmap)
-		munmap(s->data, s->size);
+		realmunmap(s->data, s->size);
 	s->should_free = s->should_munmap = 0;
 	s->data = NULL;
 	free(s->cnt_data);
diff --git a/git-compat-util.h b/git-compat-util.h
index 5d543d2..85150f8 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -42,22 +42,28 @@ extern int error(const char *err, ...) _
 
 #ifdef NO_MMAP
 
-#ifndef PROT_READ
+#include <sys/mman.h>
+/*#ifndef PROT_READ
 #define PROT_READ 1
 #define PROT_WRITE 2
 #define MAP_PRIVATE 1
 #define MAP_FAILED ((void*)-1)
-#endif
+#endif*/
 
 #define mmap gitfakemmap
 #define munmap gitfakemunmap
 extern void *gitfakemmap(void *start, size_t length, int prot , int flags, int fd, off_t offset);
 extern int gitfakemunmap(void *start, size_t length);
 
+extern void *realmmap(void *start, size_t length, int prot , int flags, int fd, off_t offset);
+extern int realmunmap(void *start, size_t length);
+
 #else /* NO_MMAP */
 
 #include <sys/mman.h>
 
+#define realmmap mmap
+#define realmunmap munmap
 #endif /* NO_MMAP */
 
 #ifdef NO_SETENV
diff --git a/sha1_file.c b/sha1_file.c
index 58edec0..712a068 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -330,14 +330,14 @@ void prepare_alt_odb(void)
 		close(fd);
 		return;
 	}
-	map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	map = realmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 	if (map == MAP_FAILED)
 		return;
 
 	link_alt_odb_entries(map, map + st.st_size, '\n',
 			     get_object_directory());
-	munmap(map, st.st_size);
+	realmunmap(map, st.st_size);
 }
 
 static char *find_sha1_file(const unsigned char *sha1, struct stat *st)
@@ -378,7 +378,7 @@ static int check_packed_git_idx(const ch
 		return -1;
 	}
 	idx_size = st.st_size;
-	idx_map = mmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	idx_map = realmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 	if (idx_map == MAP_FAILED)
 		return -1;
@@ -423,7 +423,7 @@ static int unuse_one_packed_git(void)
 	}
 	if (!lru)
 		return 0;
-	munmap(lru->pack_base, lru->pack_size);
+	realmunmap(lru->pack_base, lru->pack_size);
 	lru->pack_base = NULL;
 	return 1;
 }
@@ -460,7 +460,7 @@ int use_packed_git(struct packed_git *p)
 		}
 		if (st.st_size != p->pack_size)
 			die("packfile %s size mismatch.", p->pack_name);
-		map = mmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
+		map = realmmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, fd, 0);
 		close(fd);
 		if (map == MAP_FAILED)
 			die("packfile %s cannot be mapped.", p->pack_name);
@@ -494,7 +494,7 @@ struct packed_git *add_packed_git(char *
 	/* do we have a corresponding .pack file? */
 	strcpy(path + path_len - 4, ".pack");
 	if (stat(path, &st) || !S_ISREG(st.st_mode)) {
-		munmap(idx_map, idx_size);
+		realmunmap(idx_map, idx_size);
 		return NULL;
 	}
 	/* ok, it looks sane as far as we can check without
@@ -647,7 +647,7 @@ static void *map_sha1_file_internal(cons
 		 */
 		sha1_file_open_flag = 0;
 	}
-	map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	map = realmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 	if (map == MAP_FAILED)
 		return NULL;
@@ -1184,7 +1184,7 @@ int sha1_object_info(const unsigned char
 			*sizep = size;
 	}
 	inflateEnd(&stream);
-	munmap(map, mapsize);
+	realmunmap(map, mapsize);
 	return status;
 }
 
@@ -1210,7 +1210,7 @@ void * read_sha1_file(const unsigned cha
 	map = map_sha1_file_internal(sha1, &mapsize);
 	if (map) {
 		buf = unpack_sha1_file(map, mapsize, type, size);
-		munmap(map, mapsize);
+		realmunmap(map, mapsize);
 		return buf;
 	}
 	return NULL;
@@ -1493,7 +1493,7 @@ int write_sha1_to_fd(int fd, const unsig
 	} while (posn < objsize);
 
 	if (map)
-		munmap(map, objsize);
+		realmunmap(map, objsize);
 	if (temp_obj)
 		free(temp_obj);
 
@@ -1646,7 +1646,7 @@ int index_fd(unsigned char *sha1, int fd
 
 	buf = "";
 	if (size)
-		buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+		buf = realmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
 	close(fd);
 	if (buf == MAP_FAILED)
 		return -1;
@@ -1660,7 +1660,7 @@ int index_fd(unsigned char *sha1, int fd
 		ret = 0;
 	}
 	if (size)
-		munmap(buf, size);
+		realmunmap(buf, size);
 	return ret;
 }
 


^ permalink raw reply related

* Re: parsecvs tool now creates git repositories
From: Keith Packard @ 2006-04-03 14:37 UTC (permalink / raw)
  To: Erik Mouw; +Cc: keithp, Git Mailing List
In-Reply-To: <20060403140348.GE16823@harddisk-recovery.com>

[-- Attachment #1: Type: text/plain, Size: 1030 bytes --]

On Mon, 2006-04-03 at 16:03 +0200, Erik Mouw wrote:
> On Sat, Apr 01, 2006 at 09:36:28PM -0800, Keith Packard wrote:
> > The UI is a total disaster, sufficient for testing. You must create an
> > Authors file in the current directory which looks like the git-cvsimport
> > authors file. You must also have a edit-change-log program in your path
> > which edits the commit message in place. /bin/true will work if you
> > don't need to edit the messages.
> > 
> > I should clearly steal the existing git-cvsimport command line arguments
> > and use those.
> 
> What is the current way to use it? I get the impression it reads raw ,v
> files, but how do I get along with a remote CVS repository?

You can't. You need to create a local copy of the repository. There is a
tool which can do that using the cvs protocol, but I don't recall the
name.

It turns out that parsing the ,v files directly is both faster and more
accurate than attempting to interpret the output of cvs log.

-- 
keith.packard@intel.com

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 191 bytes --]

^ permalink raw reply

* Re: Cygwin can't handle huge packfiles?
From: Linus Torvalds @ 2006-04-03 14:36 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Kees-Jan Dijkzeul, git
In-Reply-To: <Pine.LNX.4.64.0604030730040.3781@g5.osdl.org>



On Mon, 3 Apr 2006, Linus Torvalds wrote:
> 
> That said, I think git _does_ have problems with large pack-files. We have 
> some 32-bit issues etc

I should clarify that. git _itself_ shouldn't have any 32-bit issues, but 
the packfile data structure does. The index has 32-bit offsets into 
individual pack-files. 

That's not hugely fundamental, but I didn't expect people to hit it this 
quickly. What kind of project has a 1.5GB pack-file _already_? I hope it's 
fifteen years of history (so that we'll have another fifteen years before 
we'll have to worry about 4GB pack-files ;)

			Linus

^ permalink raw reply

* Re: Cygwin can't handle huge packfiles?
From: Linus Torvalds @ 2006-04-03 14:33 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Kees-Jan Dijkzeul, git
In-Reply-To: <Pine.LNX.4.63.0604031521170.4011@wbgn013.biozentrum.uni-wuerzburg.de>



On Mon, 3 Apr 2006, Johannes Schindelin wrote:
> 
> The problem is not mmap() on cygwin, but that a fork() has to jump through 
> loops to reinstall the open file descriptors on cygwin. If the 
> corresponding file was deleted, that fails. Therefore, we work around that 
> on cygwin by actually reading the file into memory, *not* mmap()ing it.

Well, we could actually do a _real_ mmap on pack-files. The pack-files are 
much better mmap'ed - there we don't _want_ them to be removed while we're 
using them. It was the index file etc that was problematic.

Maybe the cygwin fake mmap should be triggered only for the index (and 
possibly the individual objects - if only because there doing a 
malloc+read may actually be faster).

Using malloc+read on pack-files is pretty wasteful, since we usually only 
use a very small part of them (ie if we have a 1.5GB pack-file, it's sad 
to read all of it, when we'd usually actually access just a small small 
fraction of it).

That said, I think git _does_ have problems with large pack-files. We have 
some 32-bit issues etc, and just virtual address space things. So for now, 
it's probably best to limit pack-files to the few-hundred-meg size, and 
create serveral smaller ones rather than one huge one.

		Linus

^ permalink raw reply

* Re: n-heads and patch dependency chains
From: Linus Torvalds @ 2006-04-03 14:29 UTC (permalink / raw)
  To: Sam Vilain; +Cc: Git Mailing List
In-Reply-To: <4430D352.4010707@vilain.net>



On Mon, 3 Apr 2006, Sam Vilain wrote:
> 
> "Patch dependency chains", the best plain-English term we could find for
> the scary sounding darcs term "patch calculus", are said by some to be a
> very good reason to use a system like darcs, even to some its
> fundamental advantage over systems such as git.

I really think that darcs is just "nice theory", and has nothing to do 
with real life.

Terms like "patch calculus" may sound cool and useful, but let's face it, 
the proof is in the pudding, and quite frankly, I would bet that trying to 
do something like that on a real project would just be a total disaster.

I want my merges simple. I want them to take a couple of seconds. I don't 
want to see strange patch dependencies that nobody cares about. And I very 
much don't want to see theory over practice.

		Linus

^ permalink raw reply

* Re: Cygwin can't handle huge packfiles?
From: Morten Welinder @ 2006-04-03 14:26 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Kees-Jan Dijkzeul, git
In-Reply-To: <Pine.LNX.4.63.0604031521170.4011@wbgn013.biozentrum.uni-wuerzburg.de>

> The problem is not mmap() on cygwin, but that a fork() has to jump through
> loops to reinstall the open file descriptors on cygwin. If the
> corresponding file was deleted, that fails. Therefore, we work around that
> on cygwin by actually reading the file into memory, *not* mmap()ing it.

Maybe, but you aren't going to be able to handler much bigger packs
even on *nix.  Unless you go 64-bit, that is.

M.

^ permalink raw reply

* Re: parsecvs tool now creates git repositories
From: Jakub Narebski @ 2006-04-03 14:21 UTC (permalink / raw)
  To: git
In-Reply-To: <20060403140348.GE16823@harddisk-recovery.com>

Erik Mouw wrote:

> On Sat, Apr 01, 2006 at 09:36:28PM -0800, Keith Packard wrote:
>> The UI is a total disaster, sufficient for testing. You must create an
>> Authors file in the current directory which looks like the git-cvsimport
>> authors file. You must also have a edit-change-log program in your path
>> which edits the commit message in place. /bin/true will work if you
>> don't need to edit the messages.
>> 
>> I should clearly steal the existing git-cvsimport command line arguments
>> and use those.
> 
> What is the current way to use it? I get the impression it reads raw ,v
> files, but how do I get along with a remote CVS repository?

>From the comments on #git, parsecvs reads raw ,v files for creating history
tree, then uses 'cvs co ...' for getting the contents.

If you have access to remote CVS repository, it was suggested to use either
cvsclone or cvsup.

-- 
Jakub Narebski
Warsaw, Poland

^ permalink raw reply

* Re: parsecvs tool now creates git repositories
From: Erik Mouw @ 2006-04-03 14:03 UTC (permalink / raw)
  To: Keith Packard; +Cc: Git Mailing List
In-Reply-To: <1143956188.2303.39.camel@neko.keithp.com>

On Sat, Apr 01, 2006 at 09:36:28PM -0800, Keith Packard wrote:
> The UI is a total disaster, sufficient for testing. You must create an
> Authors file in the current directory which looks like the git-cvsimport
> authors file. You must also have a edit-change-log program in your path
> which edits the commit message in place. /bin/true will work if you
> don't need to edit the messages.
> 
> I should clearly steal the existing git-cvsimport command line arguments
> and use those.

What is the current way to use it? I get the impression it reads raw ,v
files, but how do I get along with a remote CVS repository?


Erik

-- 
+-- Erik Mouw -- www.harddisk-recovery.com -- +31 70 370 12 90 --
| Lab address: Delftechpark 26, 2628 XH, Delft, The Netherlands

^ permalink raw reply

* Re: parsecvs tool now creates git repositories
From: Erik Mouw @ 2006-04-03 13:58 UTC (permalink / raw)
  To: Jan-Benedict Glaw; +Cc: Keith Packard, Git Mailing List
In-Reply-To: <20060403072554.GN1259@lug-owl.de>

On Mon, Apr 03, 2006 at 09:25:54AM +0200, Jan-Benedict Glaw wrote:
> On Sun, 2006-04-02 21:10:56 -0700, Keith Packard <keithp@keithp.com> wrote:
> > I think this is a bug in your version of flex; I'm using standard lex
> > conventions here. I don't know how to make it work for you.
> 
> It compiles for me with this patch (thanks to Linus for the hint):
> 
> diff --git a/Makefile b/Makefile

[...]

> Would you please verify that it doesn't break things for you?

Almost there. I applied your patch and ran "make clean", but the
Makefile forgets to remove lex.c. Here's an updated patch:

diff --git a/Makefile b/Makefile
index 639353a..5651e70 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,8 @@ GCC_WARNINGS2=-Wmissing-prototypes -Wmis
 GCC_WARNINGS3=-Wnested-externs -fno-strict-aliasing
 GCC_WARNINGS=$(GCC_WARNINGS1) $(GCC_WARNINGS2) $(GCC_WARNINGS3)
 CFLAGS=-O0 -g $(GCC_WARNINGS)
-YFLAGS=-d
+YFLAGS=-d -l
+LFLAGS=-l
 
 SRCS=gram.y lex.l cvs.h parsecvs.c cvsutil.c revlist.c atom.c revcvs.c git.c
 
@@ -20,4 +21,4 @@ lex.o: lex.c
 y.tab.h: gram.c
 
 clean:
-	rm -f $(OBJS) y.tab.h gram.c parsecvs
+	rm -f $(OBJS) y.tab.h gram.c lex.c parsecvs



It compiles! Ship it! ;-)


Erik

-- 
+-- Erik Mouw -- www.harddisk-recovery.com -- +31 70 370 12 90 --
| Lab address: Delftechpark 26, 2628 XH, Delft, The Netherlands

^ permalink raw reply related

* Re: Default remote branch for local branch
From: Josef Weidendorfer @ 2006-04-03 13:57 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git
In-Reply-To: <7vr74ff4oy.fsf@assigned-by-dhcp.cox.net>

On Monday 03 April 2006 10:23, Junio C Hamano wrote:
> Josef Weidendorfer <Josef.Weidendorfer@gmx.de> writes:
> 
> > On Sunday 02 April 2006 23:40, you wrote:
> >> > Let me try to understand this: the general idea is that
> >> >
> >> >   pull.origin = [<refspec> of] <remote> for <branch>
> >> >
> >> > specifies the default action of git-pull if we are on <branch>, ie.
> >> > a "git pull" then runs "git pull <remote> [<refspec>]".
> >> 
> >> Not quite.
> >> 
> >> It will be (if this were a serious proposal -- I am not
> >> absolutely convinced this is a good idea) more like "git fetch
> >> <remote>" followed by "git-merge HEAD the-refspec-named-there".
> >
> > So it is not really a <refspec>, but a <localbranch> which has to
> > appear in the .git/remotes file on the right side of a refspec on
> > a Pull line.
> 
> No, I meant <refspec> not <localbranch> here, because I do not
> want to force people to have tracking local branch.

Oh, ok. I just had to look up the manual regarding refspecs when
pulling/fetching again: "<ref>" is the same as "<ref>:" and does
not create/use any local branch.

> > All I wanted to remark is, that, with
> >
> >  URL: <remote-URL>
> >  Pull: refs/head/master:refs/head/remote1
> >  Pull: refs/head/other:refs/head/remote2
> >
> > the config
> >
> >  pull.origin = <remote> for refs/head/my-devel-for-remote2
> >
> > which does not use the [<refspec> of] part, always is bogus:
> > We get remote1 merged into my-devel-for-remote2 on a git-pull,
> > which is not what we want.
> 
> I think we are on the same page, if you just think of not having
> [<refspec> of] a short-hand for naming the first Pull: line.

Yes, I understand this. But I do not think that such an optional
shortcut is useful for config files entries. It complicates parsing/editors,
and seems to make it more confusing. Despite, I agree that such optional
shortcuts are nice for porcelain command lines.

As I see the use of "<refspec> of <remote>", I still think that specifying
a local branch is useful, too. So what about

  pull.origin = (<refspec> of <remote> | <localbranch>) for <branch>	

instead? We can distinguish the two cases by looking for the "of".
When the <refspec> is of the form <src>:<dst> (where <dst> is the local
branch), we probably want to sanity check against .git/remotes/.

> > Optionally, branching <new> off from <old> could add <new> as
> > topic branch of <old>: Thus, if you are on <old> and do git-pull,
> > you get <new> merged in.
> 
> I agree with Andreas on this part.

Yup. If we want to support topic branches with defaults, we should use
another config option. And now, I am not convinced about the usefulness
of this any more.

> Not if you made the [<refspec> of] part <localbranch>.  Then
> this configuration for default merge source per local branch
> feature is available only to people who are willing to use
> tracking branches.

Yes, I understand this now.

> I just wanted to mention that it would be handy to be able to
> take snapshots of tracking branch heads, but it does not really
> matter whether they are "your" local development branches or
> tracking branches.  Just a nightly or on-demand
> 
>          d="$GIT_DIR/refs/snapshot/`date '+%Y-%m-%d'`" &&
>          mkdir "$GIT_DIR/refs/snapshot/$d" &&
>          tar Ccf "$GIT_DIR/refs/" - heads |
>          tar Cxf "$d" 

Wow. For this, versioning of the /refs directory as subproject
would be good ;-)

Josef

^ permalink raw reply

* Re: Cygwin can't handle huge packfiles?
From: Johannes Schindelin @ 2006-04-03 13:23 UTC (permalink / raw)
  To: Kees-Jan Dijkzeul; +Cc: git
In-Reply-To: <fa0b6e200604030246q21fccb9ar93004ac67d8b28b3@mail.gmail.com>

Hi,

On Mon, 3 Apr 2006, Kees-Jan Dijkzeul wrote:

> I'm trying to get Git to manage a 5Gb source tree. Under linux, this
> works like a charm. Under cygwin, however, I run in to difficulties.
> For example:
> 
> $ git-clone sgp-wa/ sgp-wa.clone
> fatal: packfile
> ./objects/pack/pack-56aa013a0234e198467ed37ae5db925764a6ee98.pack
> cannot be mapped.
> fatal: unexpected EOF
> fetch-pack from '/cygdrive/e/Projects/sgp-wa/.git' failed.
> 
> To figure out what is happening, I printed the value of errno, which
> turns out to be 12 (Cannot allocate memory). I'm not sure how mmap is
> implemented in cygwin, but if they allocate memory and load the file
> into it, then this error is not surprising, as the pack file in
> question is 1.5Gb in size.

The problem is not mmap() on cygwin, but that a fork() has to jump through 
loops to reinstall the open file descriptors on cygwin. If the 
corresponding file was deleted, that fails. Therefore, we work around that 
on cygwin by actually reading the file into memory, *not* mmap()ing it.

Hth,
Dscho

^ permalink raw reply

* Re: Default remote branch for local branch
From: Andreas Ericsson @ 2006-04-03 10:03 UTC (permalink / raw)
  To: Josef Weidendorfer; +Cc: Junio C Hamano, git
In-Reply-To: <200604031138.29227.Josef.Weidendorfer@gmx.de>

Josef Weidendorfer wrote:
> On Monday 03 April 2006 09:56, Andreas Ericsson wrote:
> 
>>Josef Weidendorfer wrote:
>>
>>>Optionally, branching <new> off from <old> could add <new> as
>>>topic branch of <old>: Thus, if you are on <old> and do git-pull,
>>>you get <new> merged in.
>>>
>>
>>This is clearly insane. If I'm on <old> and want to sync with my 
>>upstream source that would be impossible without explicitly telling it 
>>*not* to merge with <new>. Iow, this change would (possibly) simplify 
>>for the one repo maintainer, but make things harder for the 30-odd 
>>developers.
> 
> 
> Yes.
> Therefore I put "optionally" above. But you are right, mixing up
> "merge upstream" and "merge downstream" into one config option is insane.
> 
> Some idea independent but related:
> I still think it is a better UI of a porcelain to try to note metainfo
> automatically, ie. storing somewhere that we branched one off another.
> What about adding "branch.topic" config option for this?
> "git-branch -t newtopic"/"git-checkout -b newtopic -t"
> would create a new topic branch, which is remembered in "branch.topic",
> and "git-pull -t" merges these topic branches?
> To specify that a remote branch is a topic branch of a given local
> branch (to be pulled into with "git-pull -t"), we could add
> "git-branch --add-topic <refspec>|<remoteURL>".
> 

Sorry, but I still don't see the use of it. Usually, some topics mature 
faster than others, meaning I'd still have to do the old "git pull . 
this and that", leaving "the-other-one" to soak a bit longer. What 
you're suggesting would make the odd case easier while adding nothing 
for the normal flow.

For archeological purposes it might make some sense to record what the 
branch was named that you forked from, but to me it's more interesting 
to see which state the code was in when the code forked, and this is 
discernable by the merge-base command.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply

* Cygwin can't handle huge packfiles?
From: Kees-Jan Dijkzeul @ 2006-04-03  9:46 UTC (permalink / raw)
  To: git

Hi,

I'm trying to get Git to manage a 5Gb source tree. Under linux, this
works like a charm. Under cygwin, however, I run in to difficulties.
For example:

$ git-clone sgp-wa/ sgp-wa.clone
fatal: packfile
./objects/pack/pack-56aa013a0234e198467ed37ae5db925764a6ee98.pack
cannot be mapped.
fatal: unexpected EOF
fetch-pack from '/cygdrive/e/Projects/sgp-wa/.git' failed.

To figure out what is happening, I printed the value of errno, which
turns out to be 12 (Cannot allocate memory). I'm not sure how mmap is
implemented in cygwin, but if they allocate memory and load the file
into it, then this error is not surprising, as the pack file in
question is 1.5Gb in size.

I'm not sure how to approach this problem. Any tips would be greatly
appreciated.

Thanks a lot!

Kees-Jan

^ permalink raw reply

* Re: Default remote branch for local branch
From: Josef Weidendorfer @ 2006-04-03  9:38 UTC (permalink / raw)
  To: Andreas Ericsson; +Cc: Junio C Hamano, git
In-Reply-To: <4430D517.5020205@op5.se>

On Monday 03 April 2006 09:56, Andreas Ericsson wrote:
> Josef Weidendorfer wrote:
> > 
> > Optionally, branching <new> off from <old> could add <new> as
> > topic branch of <old>: Thus, if you are on <old> and do git-pull,
> > you get <new> merged in.
> > 
> 
> This is clearly insane. If I'm on <old> and want to sync with my 
> upstream source that would be impossible without explicitly telling it 
> *not* to merge with <new>. Iow, this change would (possibly) simplify 
> for the one repo maintainer, but make things harder for the 30-odd 
> developers.

Yes.
Therefore I put "optionally" above. But you are right, mixing up
"merge upstream" and "merge downstream" into one config option is insane.

Some idea independent but related:
I still think it is a better UI of a porcelain to try to note metainfo
automatically, ie. storing somewhere that we branched one off another.
What about adding "branch.topic" config option for this?
"git-branch -t newtopic"/"git-checkout -b newtopic -t"
would create a new topic branch, which is remembered in "branch.topic",
and "git-pull -t" merges these topic branches?
To specify that a remote branch is a topic branch of a given local
branch (to be pulled into with "git-pull -t"), we could add
"git-branch --add-topic <refspec>|<remoteURL>".

Josef

^ permalink raw reply

* Re: Moving a file back to an earlier revision.
From: Karl Hasselström @ 2006-04-03  8:29 UTC (permalink / raw)
  To: David Ho; +Cc: Junio C Hamano, git
In-Reply-To: <4dd15d180603311421w7b48cfa4y80d3f9d565d4276f@mail.gmail.com>

On 2006-03-31 17:21:48 -0500, David Ho wrote:
[quoting junkio]

> > Remember, a branch in git is very cheap, and is a powerful way to
> > keep track of things while you decide which alternate universe to
> > take. And even after you decide, you could always look at and even
> > build on the other universe.
>
> I feel embarrassed to say this but in my branch there are commits to
> the driver and other commits for the board so it looks more like
>
> ---0---D1---B1---B2---D2---B3---B4---
>
> D* - driver changes
> B* - board changes
>
> So to go back to the 0 state I lose my board changes. But I hope
> what I did (in my reply to Linus) is very close to your idea of
> having separate branches.

You could use either stgit or the cherry-picking stuff in git to
create a new branch with these commits, but reordered any way you
like. (Or, given what you want to use it for, maybe two separate topic
branches -- one for board changes and one for driver changes -- that
can be merged to produce the end result.)

-- 
Karl Hasselström, kha@treskal.com
      www.treskal.com/kalle

^ permalink raw reply

* Re: Default remote branch for local branch
From: Junio C Hamano @ 2006-04-03  8:23 UTC (permalink / raw)
  To: Josef Weidendorfer; +Cc: git
In-Reply-To: <200604030128.42680.Josef.Weidendorfer@gmx.de>

Josef Weidendorfer <Josef.Weidendorfer@gmx.de> writes:

> On Sunday 02 April 2006 23:40, you wrote:
>> > Let me try to understand this: the general idea is that
>> >
>> >   pull.origin = [<refspec> of] <remote> for <branch>
>> >
>> > specifies the default action of git-pull if we are on <branch>, ie.
>> > a "git pull" then runs "git pull <remote> [<refspec>]".
>> 
>> Not quite.
>> 
>> It will be (if this were a serious proposal -- I am not
>> absolutely convinced this is a good idea) more like "git fetch
>> <remote>" followed by "git-merge HEAD the-refspec-named-there".
>
> So it is not really a <refspec>, but a <localbranch> which has to
> appear in the .git/remotes file on the right side of a refspec on
> a Pull line.

No, I meant <refspec> not <localbranch> here, because I do not
want to force people to have tracking local branch.  The call to
merge in pull that happens immediately after fetch does not use
localbranch, but uses what's recorded in FETCH_HEAD, and if we
were to do this change the above change would most likely to
affect how each remote branch heads fetched are marked for (or
not for) merges.

> All I wanted to remark is, that, with
>
>  URL: <remote-URL>
>  Pull: refs/head/master:refs/head/remote1
>  Pull: refs/head/other:refs/head/remote2
>
> the config
>
>  pull.origin = <remote> for refs/head/my-devel-for-remote2
>
> which does not use the [<refspec> of] part, always is bogus:
> We get remote1 merged into my-devel-for-remote2 on a git-pull,
> which is not what we want.

I think we are on the same page, if you just think of not having
[<refspec> of] a short-hand for naming the first Pull: line.

> Optionally, branching <new> off from <old> could add <new> as
> topic branch of <old>: Thus, if you are on <old> and do git-pull,
> you get <new> merged in.

I agree with Andreas on this part.

>> A bigger thing is that I am trying to avoid _requiring_ tracking
>> branches.
>
> I don't think you force anything when you add functionality to git-pull
> for the config discussed here. Nobody *has* to use this config - it's
> a porcelain thingie.

Not if you made the [<refspec> of] part <localbranch>.  Then
this configuration for default merge source per local branch
feature is available only to people who are willing to use
tracking branches.

>> If you are not micromanaging your subsystem 
>> maintainers, you should not have to care where they were the
>> last time you pulled from them....
>> [...why not needing to use tracking branches is good...]

>> If we are going require tracking branches,
>
> I do not understand. Why should we require this?

I forgot to say "on the other hand" before that "If".

>> we could do a bit 
>> more with them, like remembering where the tip was when we
>> fetched the last time (or the time before that...) and diff with
>> that, but the tracking branch heads are not set up to do things
>> like that right now -- they are single pointers.

Actually I think this part is independent and I did not have to
mention.  There is no point complicating only the tracking
branches at all.

I just wanted to mention that it would be handy to be able to
take snapshots of tracking branch heads, but it does not really
matter whether they are "your" local development branches or
tracking branches.  Just a nightly or on-demand

         d="$GIT_DIR/refs/snapshot/`date '+%Y-%m-%d'`" &&
         mkdir "$GIT_DIR/refs/snapshot/$d" &&
         tar Ccf "$GIT_DIR/refs/" - heads |
         tar Cxf "$d" -

would do, and then you should be able to ask "where was my
'master' head yesterday?".

^ permalink raw reply

* Re: Default remote branch for local branch
From: Andreas Ericsson @ 2006-04-03  7:56 UTC (permalink / raw)
  To: Josef Weidendorfer; +Cc: Junio C Hamano, git
In-Reply-To: <200604030128.42680.Josef.Weidendorfer@gmx.de>

Josef Weidendorfer wrote:
> 
> Optionally, branching <new> off from <old> could add <new> as
> topic branch of <old>: Thus, if you are on <old> and do git-pull,
> you get <new> merged in.
> 

This is clearly insane. If I'm on <old> and want to sync with my 
upstream source that would be impossible without explicitly telling it 
*not* to merge with <new>. Iow, this change would (possibly) simplify 
for the one repo maintainer, but make things harder for the 30-odd 
developers.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

^ permalink raw reply

* n-heads and patch dependency chains
From: Sam Vilain @ 2006-04-03  7:48 UTC (permalink / raw)
  To: Git Mailing List

Hi all,

"Patch dependency chains", the best plain-English term we could find for
the scary sounding darcs term "patch calculus", are said by some to be a
very good reason to use a system like darcs, even to some its
fundamental advantage over systems such as git.

The question is, to what extent is this point true?

To get a feel for the issues involved, I tried applying an example
n-head and patch dependency logic to a "real" project that had a small
enough number of commits to make it feasible to construct everything
manually.

A side-by-side comparison of the commit history with n-head commits
(left pane) and without (right pane) is here:

  http://utsl.gen.nz/git/hydra-vs-regular.png

Some important points to note:

  0. gitk is re-ordering the commits to try to make the graph tidy,
     so don't worry that the commits are in a different order

  1. commits were considered dependent on the last commit(s) that
     delivered the file(s) they modified.  As in, the user specified
     that extra heads be created wherever possible, including
     splitting heads.

  2. the "rolling" n-head was continually discarded.  "n-head" is
     misspelt :) as "hydra-head" in the diagrams.

  3. patches that just added new files were performed by first
     making a new repository, adding those files and committing.
     this is why there are a lot of "starting points" on the tree

I think we can conclude from this:

  - this is not impossible using the current model, and some extra
    useful information can be seen in the tree that shows more real
    dependency information and relationships between individual commits

  - doing automatic n-head creation would probably be madness, as
    far too many useless heads are created (though it is almost
    guaranteed that supporting 'patch commuting' a la darcs would
    make this *even worse* as it would mean that you could potentially
    have even more heads)

  - the current tools make this style of development difficult.

The bugs:

  - git-merge-octopus isn't capable of merging commits where there is
    no common commit, but none was needed as the commits' trees don't
    overlap.

    That is, with "git-pull -s octopus . head1 head2 head3 ..."
    you get the error:

      Unable to find common commit with 42f49cc...

    But pulling the branches individually works fine;

      Merging HEAD with 42f49cc...
      Merging:
        c0805...
        42f49...
      found 1 common ancestor(s):
      1 virtual commit

    That 'branch' was created by setting up a new git repo in another
    path, then using 'git-fetch' to pull it into the local one.

  - for some reason I had to list "-s ours" twice to git-pull when
    manually making the octopus merge nodes

  - `git-pull --no-commit -a' suffers from the same problem

  - some tools (such as the diff window in gitk) produce *very*
    strange output if you try to merge the heads and apply the next
    patch in the same go.

The open questions:

  - would it make a difference if this automatic patch dependency
    information was stored using a different type of relationship?

  - would this be more useful if the initial n-head creation was more
    manual, like topic branches?  And if it did work like this, would
    an n-head pull feature enable the 'pu' development model to work
    seamlessly?

  - how useful are the other benefits of dependent commits?

The IRC log:

17:45 < mugwump> the other suggestions look quite good.  I don't know
                 how I got roped into spending a whole day on this :)
17:46 < mugwump> oh yeah, I remember now.  somebody asked for a
                 comparison between darcs and git
17:46  * ShadeHawk whistles innocently

So there we go, anyway.  If some form of patch dependency system is to
be included in git, then I hope this message helps to explain the
practical problems and give the would-be author a good head start :)

Hot potato into the aether, anyone?

Sam.

^ permalink raw reply

* Re: [RFH] xdiff shows trivially redundant diff.
From: Junio C Hamano @ 2006-04-03  7:33 UTC (permalink / raw)
  To: Davide Libenzi; +Cc: git
In-Reply-To: <Pine.LNX.4.64.0604022124440.10401@alien.or.mcafeemobile.com>

[-- Attachment #1: Type: text/plain, Size: 1227 bytes --]

Davide Libenzi <davidel@xmailserver.org> writes:

> No problem. That's only an eye-issue though, since the diff is still a
> valid diff according to its definition where D=A-B => B+D==A && A-D==B
> From the day I released 0.18, xregression is continuosly running w/out
> any issue. I'll check it out though ...

There is another to report, when ctxlen == 0.

Between the attached files "diff -u0 8f352aa dd40a03", the 
header for a hunk with only inserted lines misidentify the
original location.

For example, the first hunk says:

	@@ -0,0 +6 @@
        +#include "diff.h"

Which is inconsistent with what GNU diff says:

	@@ -5,0 +6 @@
        +#include "diff.h"

I've tried this patch but it is not right; the diff between the
attached two files show a 47-line hunk that inserts at line 400,
then the next 6-line hunk inserts at line 401 which is obviously
bogus.

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index afaada1..3e7f999 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -244,7 +244,7 @@ int xdl_emit_hunk_hdr(long s1, long c1, 
 	memcpy(buf, "@@ -", 4);
 	nb += 4;
 
-	nb += xdl_num_out(buf + nb, c1 ? s1: 0);
+	nb += xdl_num_out(buf + nb, c1 ? s1 : (s1-1));
 
 	if (c1 != 1) {
 		memcpy(buf + nb, ",", 1);


[-- Attachment #2: file 8f352aa --]
[-- Type: text/plain, Size: 24358 bytes --]

#include "cache.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
#include <sys/time.h>
#include <signal.h>

static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";

struct object_entry {
	unsigned char sha1[20];
	unsigned long size;	/* uncompressed size */
	unsigned long offset;	/* offset into the final pack file;
				 * nonzero if already written.
				 */
	unsigned int depth;	/* delta depth */
	unsigned int delta_limit;	/* base adjustment for in-pack delta */
	unsigned int hash;	/* name hint hash */
	enum object_type type;
	enum object_type in_pack_type;	/* could be delta */
	unsigned long delta_size;	/* delta data size (uncompressed) */
	struct object_entry *delta;	/* delta base object */
	struct packed_git *in_pack; 	/* already in pack */
	unsigned int in_pack_offset;
	struct object_entry *delta_child; /* delitified objects who bases me */
	struct object_entry *delta_sibling; /* other deltified objects who
					     * uses the same base as me
					     */
};

/*
 * Objects we are going to pack are colected in objects array (dynamically
 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
 * in the order we see -- typically rev-list --objects order that gives us
 * nice "minimum seek" order.
 *
 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
 * elements in the objects array.  The former is used to build the pack
 * index (lists object names in the ascending order to help offset lookup),
 * and the latter is used to group similar things together by try_delta()
 * heuristics.
 */

static unsigned char object_list_sha1[20];
static int non_empty = 0;
static int no_reuse_delta = 0;
static int local = 0;
static int incremental = 0;
static struct object_entry **sorted_by_sha, **sorted_by_type;
static struct object_entry *objects = NULL;
static int nr_objects = 0, nr_alloc = 0;
static const char *base_name;
static unsigned char pack_file_sha1[20];
static int progress = 1;
static volatile int progress_update = 0;

/*
 * The object names in objects array are hashed with this hashtable,
 * to help looking up the entry by object name.  Binary search from
 * sorted_by_sha is also possible but this was easier to code and faster.
 * This hashtable is built after all the objects are seen.
 */
static int *object_ix = NULL;
static int object_ix_hashsz = 0;

/*
 * Pack index for existing packs give us easy access to the offsets into
 * corresponding pack file where each object's data starts, but the entries
 * do not store the size of the compressed representation (uncompressed
 * size is easily available by examining the pack entry header).  We build
 * a hashtable of existing packs (pack_revindex), and keep reverse index
 * here -- pack index file is sorted by object name mapping to offset; this
 * pack_revindex[].revindex array is an ordered list of offsets, so if you
 * know the offset of an object, next offset is where its packed
 * representation ends.
 */
struct pack_revindex {
	struct packed_git *p;
	unsigned long *revindex;
} *pack_revindex = NULL;
static int pack_revindex_hashsz = 0;

/*
 * stats
 */
static int written = 0;
static int written_delta = 0;
static int reused = 0;
static int reused_delta = 0;

static int pack_revindex_ix(struct packed_git *p)
{
	unsigned int ui = (unsigned int) p;
	int i;

	ui = ui ^ (ui >> 16); /* defeat structure alignment */
	i = (int)(ui % pack_revindex_hashsz);
	while (pack_revindex[i].p) {
		if (pack_revindex[i].p == p)
			return i;
		if (++i == pack_revindex_hashsz)
			i = 0;
	}
	return -1 - i;
}

static void prepare_pack_ix(void)
{
	int num;
	struct packed_git *p;
	for (num = 0, p = packed_git; p; p = p->next)
		num++;
	if (!num)
		return;
	pack_revindex_hashsz = num * 11;
	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
	for (p = packed_git; p; p = p->next) {
		num = pack_revindex_ix(p);
		num = - 1 - num;
		pack_revindex[num].p = p;
	}
	/* revindex elements are lazily initialized */
}

static int cmp_offset(const void *a_, const void *b_)
{
	unsigned long a = *(unsigned long *) a_;
	unsigned long b = *(unsigned long *) b_;
	if (a < b)
		return -1;
	else if (a == b)
		return 0;
	else
		return 1;
}

/*
 * Ordered list of offsets of objects in the pack.
 */
static void prepare_pack_revindex(struct pack_revindex *rix)
{
	struct packed_git *p = rix->p;
	int num_ent = num_packed_objects(p);
	int i;
	void *index = p->index_base + 256;

	rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
	for (i = 0; i < num_ent; i++) {
		long hl = *((long *)(index + 24 * i));
		rix->revindex[i] = ntohl(hl);
	}
	/* This knows the pack format -- the 20-byte trailer
	 * follows immediately after the last object data.
	 */
	rix->revindex[num_ent] = p->pack_size - 20;
	qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
}

static unsigned long find_packed_object_size(struct packed_git *p,
					     unsigned long ofs)
{
	int num;
	int lo, hi;
	struct pack_revindex *rix;
	unsigned long *revindex;
	num = pack_revindex_ix(p);
	if (num < 0)
		die("internal error: pack revindex uninitialized");
	rix = &pack_revindex[num];
	if (!rix->revindex)
		prepare_pack_revindex(rix);
	revindex = rix->revindex;
	lo = 0;
	hi = num_packed_objects(p) + 1;
	do {
		int mi = (lo + hi) / 2;
		if (revindex[mi] == ofs) {
			return revindex[mi+1] - ofs;
		}
		else if (ofs < revindex[mi])
			hi = mi;
		else
			lo = mi + 1;
	} while (lo < hi);
	die("internal error: pack revindex corrupt");
}

static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
	unsigned long othersize, delta_size;
	char type[10];
	void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
	void *delta_buf;

	if (!otherbuf)
		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
        delta_buf = diff_delta(otherbuf, othersize,
			       buf, size, &delta_size, 0);
        if (!delta_buf || delta_size != entry->delta_size)
        	die("delta size changed");
        free(buf);
        free(otherbuf);
	return delta_buf;
}

/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size", then three bits of "type",
 *    and the high bit is "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
{
	int n = 1;
	unsigned char c;

	if (type < OBJ_COMMIT || type > OBJ_DELTA)
		die("bad type %d", type);

	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
		*hdr++ = c | 0x80;
		c = size & 0x7f;
		size >>= 7;
		n++;
	}
	*hdr = c;
	return n;
}

static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
{
	unsigned long size;
	char type[10];
	void *buf;
	unsigned char header[10];
	unsigned hdrlen, datalen;
	enum object_type obj_type;
	int to_reuse = 0;

	obj_type = entry->type;
	if (! entry->in_pack)
		to_reuse = 0;	/* can't reuse what we don't have */
	else if (obj_type == OBJ_DELTA)
		to_reuse = 1;	/* check_object() decided it for us */
	else if (obj_type != entry->in_pack_type)
		to_reuse = 0;	/* pack has delta which is unusable */
	else if (entry->delta)
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

	if (! to_reuse) {
		buf = read_sha1_file(entry->sha1, type, &size);
		if (!buf)
			die("unable to read %s", sha1_to_hex(entry->sha1));
		if (size != entry->size)
			die("object %s size inconsistency (%lu vs %lu)",
			    sha1_to_hex(entry->sha1), size, entry->size);
		if (entry->delta) {
			buf = delta_against(buf, size, entry);
			size = entry->delta_size;
			obj_type = OBJ_DELTA;
		}
		/*
		 * The object header is a byte of 'type' followed by zero or
		 * more bytes of length.  For deltas, the 20 bytes of delta
		 * sha1 follows that.
		 */
		hdrlen = encode_header(obj_type, size, header);
		sha1write(f, header, hdrlen);

		if (entry->delta) {
			sha1write(f, entry->delta, 20);
			hdrlen += 20;
		}
		datalen = sha1write_compressed(f, buf, size);
		free(buf);
	}
	else {
		struct packed_git *p = entry->in_pack;
		use_packed_git(p);

		datalen = find_packed_object_size(p, entry->in_pack_offset);
		buf = p->pack_base + entry->in_pack_offset;
		sha1write(f, buf, datalen);
		unuse_packed_git(p);
		hdrlen = 0; /* not really */
		if (obj_type == OBJ_DELTA)
			reused_delta++;
		reused++;
	}
	if (obj_type == OBJ_DELTA)
		written_delta++;
	written++;
	return hdrlen + datalen;
}

static unsigned long write_one(struct sha1file *f,
			       struct object_entry *e,
			       unsigned long offset)
{
	if (e->offset)
		/* offset starts from header size and cannot be zero
		 * if it is written already.
		 */
		return offset;
	e->offset = offset;
	offset += write_object(f, e);
	/* if we are deltified, write out its base object. */
	if (e->delta)
		offset = write_one(f, e->delta, offset);
	return offset;
}

static void write_pack_file(void)
{
	int i;
	struct sha1file *f;
	unsigned long offset;
	struct pack_header hdr;
	unsigned last_percent = 999;
	int do_progress = 0;

	if (!base_name)
		f = sha1fd(1, "<stdout>");
	else {
		f = sha1create("%s-%s.%s", base_name,
			       sha1_to_hex(object_list_sha1), "pack");
		do_progress = progress;
	}
	if (do_progress)
		fprintf(stderr, "Writing %d objects.\n", nr_objects);

	hdr.hdr_signature = htonl(PACK_SIGNATURE);
	hdr.hdr_version = htonl(PACK_VERSION);
	hdr.hdr_entries = htonl(nr_objects);
	sha1write(f, &hdr, sizeof(hdr));
	offset = sizeof(hdr);
	for (i = 0; i < nr_objects; i++) {
		offset = write_one(f, objects + i, offset);
		if (do_progress) {
			unsigned percent = written * 100 / nr_objects;
			if (progress_update || percent != last_percent) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
					percent, written, nr_objects);
				progress_update = 0;
				last_percent = percent;
			}
		}
	}
	if (do_progress)
		fputc('\n', stderr);

	sha1close(f, pack_file_sha1, 1);
}

static void write_index_file(void)
{
	int i;
	struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx");
	struct object_entry **list = sorted_by_sha;
	struct object_entry **last = list + nr_objects;
	unsigned int array[256];

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
	 * having to do eight extra binary search iterations).
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *entry = *next;
			if (entry->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
	sha1write(f, array, 256 * sizeof(int));

	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *entry = *list++;
		unsigned int offset = htonl(entry->offset);
		sha1write(f, &offset, 4);
		sha1write(f, entry->sha1, 20);
	}
	sha1write(f, pack_file_sha1, 20);
	sha1close(f, NULL, 1);
}

static int add_object_entry(unsigned char *sha1, unsigned int hash)
{
	unsigned int idx = nr_objects;
	struct object_entry *entry;
	struct packed_git *p;
	unsigned int found_offset = 0;
	struct packed_git *found_pack = NULL;

	for (p = packed_git; p; p = p->next) {
		struct pack_entry e;
		if (find_pack_entry_one(sha1, &e, p)) {
			if (incremental)
				return 0;
			if (local && !p->pack_local)
				return 0;
			if (!found_pack) {
				found_offset = e.offset;
				found_pack = e.p;
			}
		}
	}

	if (idx >= nr_alloc) {
		unsigned int needed = (idx + 1024) * 3 / 2;
		objects = xrealloc(objects, needed * sizeof(*entry));
		nr_alloc = needed;
	}
	entry = objects + idx;
	memset(entry, 0, sizeof(*entry));
	memcpy(entry->sha1, sha1, 20);
	entry->hash = hash;
	if (found_pack) {
		entry->in_pack = found_pack;
		entry->in_pack_offset = found_offset;
	}
	nr_objects = idx+1;
	return 1;
}

static int locate_object_entry_hash(unsigned char *sha1)
{
	int i;
	unsigned int ui;
	memcpy(&ui, sha1, sizeof(unsigned int));
	i = ui % object_ix_hashsz;
	while (0 < object_ix[i]) {
		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
			return i;
		if (++i == object_ix_hashsz)
			i = 0;
	}
	return -1 - i;
}

static struct object_entry *locate_object_entry(unsigned char *sha1)
{
	int i = locate_object_entry_hash(sha1);
	if (0 <= i)
		return &objects[object_ix[i]-1];
	return NULL;
}

static void check_object(struct object_entry *entry)
{
	char type[20];

	if (entry->in_pack) {
		unsigned char base[20];
		unsigned long size;
		struct object_entry *base_entry;

		/* We want in_pack_type even if we do not reuse delta.
		 * There is no point not reusing non-delta representations.
		 */
		check_reuse_pack_delta(entry->in_pack,
				       entry->in_pack_offset,
				       base, &size,
				       &entry->in_pack_type);

		/* Check if it is delta, and the base is also an object
		 * we are going to pack.  If so we will reuse the existing
		 * delta.
		 */
		if (!no_reuse_delta &&
		    entry->in_pack_type == OBJ_DELTA &&
		    (base_entry = locate_object_entry(base))) {

			/* Depth value does not matter - find_deltas()
			 * will never consider reused delta as the
			 * base object to deltify other objects
			 * against, in order to avoid circular deltas.
			 */

			/* uncompressed size of the delta data */
			entry->size = entry->delta_size = size;
			entry->delta = base_entry;
			entry->type = OBJ_DELTA;

			entry->delta_sibling = base_entry->delta_child;
			base_entry->delta_child = entry;

			return;
		}
		/* Otherwise we would do the usual */
	}

	if (sha1_object_info(entry->sha1, type, &entry->size))
		die("unable to get type of object %s",
		    sha1_to_hex(entry->sha1));

	if (!strcmp(type, "commit")) {
		entry->type = OBJ_COMMIT;
	} else if (!strcmp(type, "tree")) {
		entry->type = OBJ_TREE;
	} else if (!strcmp(type, "blob")) {
		entry->type = OBJ_BLOB;
	} else if (!strcmp(type, "tag")) {
		entry->type = OBJ_TAG;
	} else
		die("unable to pack object %s of type %s",
		    sha1_to_hex(entry->sha1), type);
}

static void hash_objects(void)
{
	int i;
	struct object_entry *oe;

	object_ix_hashsz = nr_objects * 2;
	object_ix = xcalloc(sizeof(int), object_ix_hashsz);
	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
		int ix = locate_object_entry_hash(oe->sha1);
		if (0 <= ix) {
			error("the same object '%s' added twice",
			      sha1_to_hex(oe->sha1));
			continue;
		}
		ix = -1 - ix;
		object_ix[ix] = i + 1;
	}
}

static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
{
	struct object_entry *child = me->delta_child;
	unsigned int m = n;
	while (child) {
		unsigned int c = check_delta_limit(child, n + 1);
		if (m < c)
			m = c;
		child = child->delta_sibling;
	}
	return m;
}

static void get_object_details(void)
{
	int i;
	struct object_entry *entry;

	hash_objects();
	prepare_pack_ix();
	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
		check_object(entry);
	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
		if (!entry->delta && entry->delta_child)
			entry->delta_limit =
				check_delta_limit(entry, 1);
}

typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);

static entry_sort_t current_sort;

static int sort_comparator(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
	return current_sort(a,b);
}

static struct object_entry **create_sorted_list(entry_sort_t sort)
{
	struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
	int i;

	for (i = 0; i < nr_objects; i++)
		list[i] = objects + i;
	current_sort = sort;
	qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
	return list;
}

static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
{
	return memcmp(a->sha1, b->sha1, 20);
}

static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
{
	if (a->type < b->type)
		return -1;
	if (a->type > b->type)
		return 1;
	if (a->hash < b->hash)
		return -1;
	if (a->hash > b->hash)
		return 1;
	if (a->size < b->size)
		return -1;
	if (a->size > b->size)
		return 1;
	return a < b ? -1 : (a > b);
}

struct unpacked {
	struct object_entry *entry;
	void *data;
};

/*
 * We search for deltas _backwards_ in a list sorted by type and
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller - deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.
 */
static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
{
	struct object_entry *cur_entry = cur->entry;
	struct object_entry *old_entry = old->entry;
	unsigned long size, oldsize, delta_size, sizediff;
	long max_size;
	void *delta_buf;

	/* Don't bother doing diffs between different types */
	if (cur_entry->type != old_entry->type)
		return -1;

	/* If the current object is at edge, take the depth the objects
	 * that depend on the current object into account -- otherwise
	 * they would become too deep.
	 */
	if (cur_entry->delta_child) {
		if (max_depth <= cur_entry->delta_limit)
			return 0;
		max_depth -= cur_entry->delta_limit;
	}

	size = cur_entry->size;
	if (size < 50)
		return -1;
	oldsize = old_entry->size;
	sizediff = oldsize > size ? oldsize - size : size - oldsize;
	if (sizediff > size / 8)
		return -1;
	if (old_entry->depth >= max_depth)
		return 0;

	/*
	 * NOTE!
	 *
	 * We always delta from the bigger to the smaller, since that's
	 * more space-efficient (deletes don't have to say _what_ they
	 * delete).
	 */
	max_size = size / 2 - 20;
	if (cur_entry->delta)
		max_size = cur_entry->delta_size-1;
	if (sizediff >= max_size)
		return -1;
	delta_buf = diff_delta(old->data, oldsize,
			       cur->data, size, &delta_size, max_size);
	if (!delta_buf)
		return 0;
	cur_entry->delta = old_entry;
	cur_entry->delta_size = delta_size;
	cur_entry->depth = old_entry->depth + 1;
	free(delta_buf);
	return 0;
}

static void progress_interval(int signum)
{
	signal(SIGALRM, progress_interval);
	progress_update = 1;
}

static void find_deltas(struct object_entry **list, int window, int depth)
{
	int i, idx;
	unsigned int array_size = window * sizeof(struct unpacked);
	struct unpacked *array = xmalloc(array_size);
	unsigned processed = 0;
	unsigned last_percent = 999;

	memset(array, 0, array_size);
	i = nr_objects;
	idx = 0;
	if (progress)
		fprintf(stderr, "Deltifying %d objects.\n", nr_objects);

	while (--i >= 0) {
		struct object_entry *entry = list[i];
		struct unpacked *n = array + idx;
		unsigned long size;
		char type[10];
		int j;

		processed++;
		if (progress) {
			unsigned percent = processed * 100 / nr_objects;
			if (percent != last_percent || progress_update) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
					percent, processed, nr_objects);
				progress_update = 0;
				last_percent = percent;
			}
		}

		if (entry->delta)
			/* This happens if we decided to reuse existing
			 * delta from a pack.  "!no_reuse_delta &&" is implied.
			 */
			continue;

		free(n->data);
		n->entry = entry;
		n->data = read_sha1_file(entry->sha1, type, &size);
		if (size != entry->size)
			die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);

		j = window;
		while (--j > 0) {
			unsigned int other_idx = idx + j;
			struct unpacked *m;
			if (other_idx >= window)
				other_idx -= window;
			m = array + other_idx;
			if (!m->entry)
				break;
			if (try_delta(n, m, depth) < 0)
				break;
		}
		idx++;
		if (idx >= window)
			idx = 0;
	}

	if (progress)
		fputc('\n', stderr);

	for (i = 0; i < window; ++i)
		free(array[i].data);
	free(array);
}

static void prepare_pack(int window, int depth)
{
	get_object_details();
	sorted_by_type = create_sorted_list(type_size_sort);
	if (window && depth)
		find_deltas(sorted_by_type, window+1, depth);
}

static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
{
	static const char cache[] = "pack-cache/pack-%s.%s";
	char *cached_pack, *cached_idx;
	int ifd, ofd, ifd_ix = -1;

	cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
	ifd = open(cached_pack, O_RDONLY);
	if (ifd < 0)
		return 0;

	if (!pack_to_stdout) {
		cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
		ifd_ix = open(cached_idx, O_RDONLY);
		if (ifd_ix < 0) {
			close(ifd);
			return 0;
		}
	}

	if (progress)
		fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
			sha1_to_hex(sha1));

	if (pack_to_stdout) {
		if (copy_fd(ifd, 1))
			exit(1);
		close(ifd);
	}
	else {
		char name[PATH_MAX];
		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd, ofd))
			exit(1);
		close(ifd);

		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd_ix, ofd))
			exit(1);
		close(ifd_ix);
		puts(sha1_to_hex(sha1));
	}

	return 1;
}

int main(int argc, char **argv)
{
	SHA_CTX ctx;
	char line[PATH_MAX + 20];
	int window = 10, depth = 10, pack_to_stdout = 0;
	struct object_entry **list;
	int i;

	setup_git_directory();

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
			if (!strcmp("--non-empty", arg)) {
				non_empty = 1;
				continue;
			}
			if (!strcmp("--local", arg)) {
				local = 1;
				continue;
			}
			if (!strcmp("--incremental", arg)) {
				incremental = 1;
				continue;
			}
			if (!strncmp("--window=", arg, 9)) {
				char *end;
				window = strtoul(arg+9, &end, 0);
				if (!arg[9] || *end)
					usage(pack_usage);
				continue;
			}
			if (!strncmp("--depth=", arg, 8)) {
				char *end;
				depth = strtoul(arg+8, &end, 0);
				if (!arg[8] || *end)
					usage(pack_usage);
				continue;
			}
			if (!strcmp("-q", arg)) {
				progress = 0;
				continue;
			}
			if (!strcmp("--no-reuse-delta", arg)) {
				no_reuse_delta = 1;
				continue;
			}
			if (!strcmp("--stdout", arg)) {
				pack_to_stdout = 1;
				continue;
			}
			usage(pack_usage);
		}
		if (base_name)
			usage(pack_usage);
		base_name = arg;
	}

	if (pack_to_stdout != !base_name)
		usage(pack_usage);

	prepare_packed_git();

	if (progress) {
		struct itimerval v;
		v.it_interval.tv_sec = 1;
		v.it_interval.tv_usec = 0;
		v.it_value = v.it_interval;
		signal(SIGALRM, progress_interval);
		setitimer(ITIMER_REAL, &v, NULL);
		fprintf(stderr, "Generating pack...\n");
	}

	while (fgets(line, sizeof(line), stdin) != NULL) {
		unsigned int hash;
		char *p;
		unsigned char sha1[20];

		if (progress_update) {
			fprintf(stderr, "Counting objects...%d\r", nr_objects);
			progress_update = 0;
		}
		if (get_sha1_hex(line, sha1))
			die("expected sha1, got garbage:\n %s", line);
		hash = 0;
		p = line+40;
		while (*p) {
			unsigned char c = *p++;
			if (isspace(c))
				continue;
			hash = hash * 11 + c;
		}
		add_object_entry(sha1, hash);
	}
	if (progress)
		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
	if (non_empty && !nr_objects)
		return 0;

	sorted_by_sha = create_sorted_list(sha1_sort);
	SHA1_Init(&ctx);
	list = sorted_by_sha;
	for (i = 0; i < nr_objects; i++) {
		struct object_entry *entry = *list++;
		SHA1_Update(&ctx, entry->sha1, 20);
	}
	SHA1_Final(object_list_sha1, &ctx);

	if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
		;
	else {
		prepare_pack(window, depth);
		if (progress && pack_to_stdout) {
			/* the other end usually displays progress itself */
			struct itimerval v = {{0,},};
			setitimer(ITIMER_REAL, &v, NULL);
			signal(SIGALRM, SIG_IGN );
			progress_update = 0;
		}
		write_pack_file();
		if (!pack_to_stdout) {
			write_index_file();
			puts(sha1_to_hex(object_list_sha1));
		}
	}
	if (progress)
		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
			nr_objects, written, written_delta, reused, reused_delta);
	return 0;
}

[-- Attachment #3: file dd40a03 --]
[-- Type: text/plain, Size: 29571 bytes --]

#include "cache.h"
#include "object.h"
#include "delta.h"
#include "pack.h"
#include "csum-file.h"
#include "diff.h"
#include <sys/time.h>
#include <signal.h>

static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list";

struct object_entry {
	unsigned char sha1[20];
	unsigned long size;	/* uncompressed size */
	unsigned long offset;	/* offset into the final pack file;
				 * nonzero if already written.
				 */
	unsigned int depth;	/* delta depth */
	unsigned int delta_limit;	/* base adjustment for in-pack delta */
	unsigned int hash;	/* name hint hash */
	enum object_type type;
	enum object_type in_pack_type;	/* could be delta */
	unsigned long delta_size;	/* delta data size (uncompressed) */
	struct object_entry *delta;	/* delta base object */
	struct packed_git *in_pack; 	/* already in pack */
	unsigned int in_pack_offset;
	struct object_entry *delta_child; /* delitified objects who bases me */
	struct object_entry *delta_sibling; /* other deltified objects who
					     * uses the same base as me
					     */
	int preferred_base;	/* we do not pack this, but is encouraged to
				 * be used as the base objectto delta huge
				 * objects against.
				 */
	int based_on_preferred;	/* current delta candidate is a preferred
				 * one, or delta against a preferred one.
				 */
};

/*
 * Objects we are going to pack are colected in objects array (dynamically
 * expanded).  nr_objects & nr_alloc controls this array.  They are stored
 * in the order we see -- typically rev-list --objects order that gives us
 * nice "minimum seek" order.
 *
 * sorted-by-sha ans sorted-by-type are arrays of pointers that point at
 * elements in the objects array.  The former is used to build the pack
 * index (lists object names in the ascending order to help offset lookup),
 * and the latter is used to group similar things together by try_delta()
 * heuristics.
 */

static unsigned char object_list_sha1[20];
static int non_empty = 0;
static int no_reuse_delta = 0;
static int local = 0;
static int incremental = 0;
static struct object_entry **sorted_by_sha, **sorted_by_type;
static struct object_entry *objects = NULL;
static int nr_objects = 0, nr_alloc = 0, nr_result = 0;
static const char *base_name;
static unsigned char pack_file_sha1[20];
static int progress = 1;
static volatile int progress_update = 0;

/*
 * The object names in objects array are hashed with this hashtable,
 * to help looking up the entry by object name.  Binary search from
 * sorted_by_sha is also possible but this was easier to code and faster.
 * This hashtable is built after all the objects are seen.
 */
static int *object_ix = NULL;
static int object_ix_hashsz = 0;

/*
 * Pack index for existing packs give us easy access to the offsets into
 * corresponding pack file where each object's data starts, but the entries
 * do not store the size of the compressed representation (uncompressed
 * size is easily available by examining the pack entry header).  We build
 * a hashtable of existing packs (pack_revindex), and keep reverse index
 * here -- pack index file is sorted by object name mapping to offset; this
 * pack_revindex[].revindex array is an ordered list of offsets, so if you
 * know the offset of an object, next offset is where its packed
 * representation ends.
 */
struct pack_revindex {
	struct packed_git *p;
	unsigned long *revindex;
} *pack_revindex = NULL;
static int pack_revindex_hashsz = 0;

/*
 * stats
 */
static int written = 0;
static int written_delta = 0;
static int reused = 0;
static int reused_delta = 0;

static int pack_revindex_ix(struct packed_git *p)
{
	unsigned int ui = (unsigned int) p;
	int i;

	ui = ui ^ (ui >> 16); /* defeat structure alignment */
	i = (int)(ui % pack_revindex_hashsz);
	while (pack_revindex[i].p) {
		if (pack_revindex[i].p == p)
			return i;
		if (++i == pack_revindex_hashsz)
			i = 0;
	}
	return -1 - i;
}

static void prepare_pack_ix(void)
{
	int num;
	struct packed_git *p;
	for (num = 0, p = packed_git; p; p = p->next)
		num++;
	if (!num)
		return;
	pack_revindex_hashsz = num * 11;
	pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz);
	for (p = packed_git; p; p = p->next) {
		num = pack_revindex_ix(p);
		num = - 1 - num;
		pack_revindex[num].p = p;
	}
	/* revindex elements are lazily initialized */
}

static int cmp_offset(const void *a_, const void *b_)
{
	unsigned long a = *(unsigned long *) a_;
	unsigned long b = *(unsigned long *) b_;
	if (a < b)
		return -1;
	else if (a == b)
		return 0;
	else
		return 1;
}

/*
 * Ordered list of offsets of objects in the pack.
 */
static void prepare_pack_revindex(struct pack_revindex *rix)
{
	struct packed_git *p = rix->p;
	int num_ent = num_packed_objects(p);
	int i;
	void *index = p->index_base + 256;

	rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1));
	for (i = 0; i < num_ent; i++) {
		long hl = *((long *)(index + 24 * i));
		rix->revindex[i] = ntohl(hl);
	}
	/* This knows the pack format -- the 20-byte trailer
	 * follows immediately after the last object data.
	 */
	rix->revindex[num_ent] = p->pack_size - 20;
	qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset);
}

static unsigned long find_packed_object_size(struct packed_git *p,
					     unsigned long ofs)
{
	int num;
	int lo, hi;
	struct pack_revindex *rix;
	unsigned long *revindex;
	num = pack_revindex_ix(p);
	if (num < 0)
		die("internal error: pack revindex uninitialized");
	rix = &pack_revindex[num];
	if (!rix->revindex)
		prepare_pack_revindex(rix);
	revindex = rix->revindex;
	lo = 0;
	hi = num_packed_objects(p) + 1;
	do {
		int mi = (lo + hi) / 2;
		if (revindex[mi] == ofs) {
			return revindex[mi+1] - ofs;
		}
		else if (ofs < revindex[mi])
			hi = mi;
		else
			lo = mi + 1;
	} while (lo < hi);
	die("internal error: pack revindex corrupt");
}

static void *delta_against(void *buf, unsigned long size, struct object_entry *entry)
{
	unsigned long othersize, delta_size;
	char type[10];
	void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize);
	void *delta_buf;

	if (!otherbuf)
		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
        delta_buf = diff_delta(otherbuf, othersize,
			       buf, size, &delta_size, 0);
        if (!delta_buf || delta_size != entry->delta_size)
        	die("delta size changed");
        free(buf);
        free(otherbuf);
	return delta_buf;
}

/*
 * The per-object header is a pretty dense thing, which is
 *  - first byte: low four bits are "size", then three bits of "type",
 *    and the high bit is "size continues".
 *  - each byte afterwards: low seven bits are size continuation,
 *    with the high bit being "size continues"
 */
static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
{
	int n = 1;
	unsigned char c;

	if (type < OBJ_COMMIT || type > OBJ_DELTA)
		die("bad type %d", type);

	c = (type << 4) | (size & 15);
	size >>= 4;
	while (size) {
		*hdr++ = c | 0x80;
		c = size & 0x7f;
		size >>= 7;
		n++;
	}
	*hdr = c;
	return n;
}

static unsigned long write_object(struct sha1file *f,
				  struct object_entry *entry)
{
	unsigned long size;
	char type[10];
	void *buf;
	unsigned char header[10];
	unsigned hdrlen, datalen;
	enum object_type obj_type;
	int to_reuse = 0;

	if (entry->preferred_base)
		return 0;

	obj_type = entry->type;
	if (! entry->in_pack)
		to_reuse = 0;	/* can't reuse what we don't have */
	else if (obj_type == OBJ_DELTA)
		to_reuse = 1;	/* check_object() decided it for us */
	else if (obj_type != entry->in_pack_type)
		to_reuse = 0;	/* pack has delta which is unusable */
	else if (entry->delta)
		to_reuse = 0;	/* we want to pack afresh */
	else
		to_reuse = 1;	/* we have it in-pack undeltified,
				 * and we do not need to deltify it.
				 */

	if (! to_reuse) {
		buf = read_sha1_file(entry->sha1, type, &size);
		if (!buf)
			die("unable to read %s", sha1_to_hex(entry->sha1));
		if (size != entry->size)
			die("object %s size inconsistency (%lu vs %lu)",
			    sha1_to_hex(entry->sha1), size, entry->size);
		if (entry->delta) {
			buf = delta_against(buf, size, entry);
			size = entry->delta_size;
			obj_type = OBJ_DELTA;
		}
		/*
		 * The object header is a byte of 'type' followed by zero or
		 * more bytes of length.  For deltas, the 20 bytes of delta
		 * sha1 follows that.
		 */
		hdrlen = encode_header(obj_type, size, header);
		sha1write(f, header, hdrlen);

		if (entry->delta) {
			sha1write(f, entry->delta, 20);
			hdrlen += 20;
		}
		datalen = sha1write_compressed(f, buf, size);
		free(buf);
	}
	else {
		struct packed_git *p = entry->in_pack;
		use_packed_git(p);

		datalen = find_packed_object_size(p, entry->in_pack_offset);
		buf = p->pack_base + entry->in_pack_offset;
		sha1write(f, buf, datalen);
		unuse_packed_git(p);
		hdrlen = 0; /* not really */
		if (obj_type == OBJ_DELTA)
			reused_delta++;
		reused++;
	}
	if (obj_type == OBJ_DELTA)
		written_delta++;
	written++;
	return hdrlen + datalen;
}

static unsigned long write_one(struct sha1file *f,
			       struct object_entry *e,
			       unsigned long offset)
{
	if (e->offset)
		/* offset starts from header size and cannot be zero
		 * if it is written already.
		 */
		return offset;
	e->offset = offset;
	offset += write_object(f, e);
	/* if we are deltified, write out its base object. */
	if (e->delta)
		offset = write_one(f, e->delta, offset);
	return offset;
}

static void write_pack_file(void)
{
	int i;
	struct sha1file *f;
	unsigned long offset;
	struct pack_header hdr;
	unsigned last_percent = 999;
	int do_progress = 0;

	if (!base_name)
		f = sha1fd(1, "<stdout>");
	else {
		f = sha1create("%s-%s.%s", base_name,
			       sha1_to_hex(object_list_sha1), "pack");
		do_progress = progress;
	}
	if (do_progress)
		fprintf(stderr, "Writing %d objects.\n", nr_result);

	hdr.hdr_signature = htonl(PACK_SIGNATURE);
	hdr.hdr_version = htonl(PACK_VERSION);
	hdr.hdr_entries = htonl(nr_result);
	sha1write(f, &hdr, sizeof(hdr));
	offset = sizeof(hdr);
	if (!nr_result)
		goto done;
	for (i = 0; i < nr_objects; i++) {
		offset = write_one(f, objects + i, offset);
		if (do_progress) {
			unsigned percent = written * 100 / nr_result;
			if (progress_update || percent != last_percent) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
					percent, written, nr_result);
				progress_update = 0;
				last_percent = percent;
			}
		}
	}
	if (do_progress)
		fputc('\n', stderr);
 done:
	sha1close(f, pack_file_sha1, 1);
}

static void write_index_file(void)
{
	int i;
	struct sha1file *f = sha1create("%s-%s.%s", base_name,
					sha1_to_hex(object_list_sha1), "idx");
	struct object_entry **list = sorted_by_sha;
	struct object_entry **last = list + nr_result;
	unsigned int array[256];

	/*
	 * Write the first-level table (the list is sorted,
	 * but we use a 256-entry lookup to be able to avoid
	 * having to do eight extra binary search iterations).
	 */
	for (i = 0; i < 256; i++) {
		struct object_entry **next = list;
		while (next < last) {
			struct object_entry *entry = *next;
			if (entry->sha1[0] != i)
				break;
			next++;
		}
		array[i] = htonl(next - sorted_by_sha);
		list = next;
	}
	sha1write(f, array, 256 * sizeof(int));

	/*
	 * Write the actual SHA1 entries..
	 */
	list = sorted_by_sha;
	for (i = 0; i < nr_result; i++) {
		struct object_entry *entry = *list++;
		unsigned int offset = htonl(entry->offset);
		sha1write(f, &offset, 4);
		sha1write(f, entry->sha1, 20);
	}
	sha1write(f, pack_file_sha1, 20);
	sha1close(f, NULL, 1);
}

static int locate_object_entry_hash(const unsigned char *sha1)
{
	int i;
	unsigned int ui;
	memcpy(&ui, sha1, sizeof(unsigned int));
	i = ui % object_ix_hashsz;
	while (0 < object_ix[i]) {
		if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20))
			return i;
		if (++i == object_ix_hashsz)
			i = 0;
	}
	return -1 - i;
}

static struct object_entry *locate_object_entry(const unsigned char *sha1)
{
	int i;

	if (!object_ix_hashsz)
		return NULL;

	i = locate_object_entry_hash(sha1);
	if (0 <= i)
		return &objects[object_ix[i]-1];
	return NULL;
}

static void rehash_objects(void)
{
	int i;
	struct object_entry *oe;

	object_ix_hashsz = nr_objects * 3;
	if (object_ix_hashsz < 1024)
		object_ix_hashsz = 1024;
	object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz);
	object_ix = memset(object_ix, 0, sizeof(int) * object_ix_hashsz);
	for (i = 0, oe = objects; i < nr_objects; i++, oe++) {
		int ix = locate_object_entry_hash(oe->sha1);
		if (0 <= ix)
			continue;
		ix = -1 - ix;
		object_ix[ix] = i + 1;
	}
}

struct name_path {
	struct name_path *up;
	const char *elem;
	int len;
};

#define DIRBITS 12

static unsigned name_hash(struct name_path *path, const char *name)
{
	struct name_path *p = path;
	const char *n = name + strlen(name);
	unsigned hash = 0, name_hash = 0, name_done = 0;

	if (n != name && n[-1] == '\n')
		n--;
	while (name <= --n) {
		unsigned char c = *n;
		if (c == '/' && !name_done) {
			name_hash = hash;
			name_done = 1;
			hash = 0;
		}
		hash = hash * 11 + c;
	}
	if (!name_done) {
		name_hash = hash;
		hash = 0;
	}
	for (p = path; p; p = p->up) {
		hash = hash * 11 + '/';
		n = p->elem + p->len;
		while (p->elem <= --n) {
			unsigned char c = *n;
			hash = hash * 11 + c;
		}
	}
	/*
	 * Make sure "Makefile" and "t/Makefile" are hashed separately
	 * but close enough.
	 */
	hash = (name_hash<<DIRBITS) | (hash & ((1U<<DIRBITS )-1));

	if (0) { /* debug */
		n = name + strlen(name);
		if (n != name && n[-1] == '\n')
			n--;
		while (name <= --n)
			fputc(*n, stderr);
		for (p = path; p; p = p->up) {
			fputc('/', stderr);
			n = p->elem + p->len;
			while (p->elem <= --n)
				fputc(*n, stderr);
		}
		fprintf(stderr, "\t%08x\n", hash);
	}
	return hash;
}

static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude)
{
	unsigned int idx = nr_objects;
	struct object_entry *entry;
	struct packed_git *p;
	unsigned int found_offset = 0;
	struct packed_git *found_pack = NULL;
	int ix, status = 0;

	if (!exclude) {
		for (p = packed_git; p; p = p->next) {
			struct pack_entry e;
			if (find_pack_entry_one(sha1, &e, p)) {
				if (incremental)
					return 0;
				if (local && !p->pack_local)
					return 0;
				if (!found_pack) {
					found_offset = e.offset;
					found_pack = e.p;
				}
			}
		}
	}
	if ((entry = locate_object_entry(sha1)) != NULL)
		goto already_added;

	if (idx >= nr_alloc) {
		unsigned int needed = (idx + 1024) * 3 / 2;
		objects = xrealloc(objects, needed * sizeof(*entry));
		nr_alloc = needed;
	}
	entry = objects + idx;
	nr_objects = idx + 1;
	memset(entry, 0, sizeof(*entry));
	memcpy(entry->sha1, sha1, 20);
	entry->hash = hash;

	if (object_ix_hashsz * 3 <= nr_objects * 4)
		rehash_objects();
	else {
		ix = locate_object_entry_hash(entry->sha1);
		if (0 <= ix)
			die("internal error in object hashing.");
		object_ix[-1 - ix] = idx + 1;
	}
	status = 1;

 already_added:
	if (progress_update) {
		fprintf(stderr, "Counting objects...%d\r", nr_objects);
		progress_update = 0;
	}
	if (exclude)
		entry->preferred_base = 1;
	else {
		if (found_pack) {
			entry->in_pack = found_pack;
			entry->in_pack_offset = found_offset;
		}
	}
	return status;
}

static void add_pbase_tree(struct tree_desc *tree, struct name_path *up)
{
	while (tree->size) {
		const unsigned char *sha1;
		const char *name;
		unsigned mode, hash;
		unsigned long size;
		char type[20];

		sha1 = tree_entry_extract(tree, &name, &mode);
		update_tree_entry(tree);
		if (!has_sha1_file(sha1))
			continue;
		if (sha1_object_info(sha1, type, &size))
			continue;

		hash = name_hash(up, name);
		if (!add_object_entry(sha1, hash, 1))
			continue;

		if (!strcmp(type, "tree")) {
			struct tree_desc sub;
			void *elem;
			struct name_path me;

			elem = read_sha1_file(sha1, type, &sub.size);
			sub.buf = elem;
			if (sub.buf) {
				me.up = up;
				me.elem = name;
				me.len = strlen(name);
				add_pbase_tree(&sub, &me);
				free(elem);
			}
		}
	}
}

static void add_preferred_base(unsigned char *sha1)
{
	struct tree_desc tree;
	void *elem;

	elem = read_object_with_reference(sha1, "tree", &tree.size, NULL);
	tree.buf = elem;
	if (!tree.buf)
		return;
	if (add_object_entry(sha1, name_hash(NULL, ""), 1))
		add_pbase_tree(&tree, NULL);
	free(elem);
}

static void check_object(struct object_entry *entry)
{
	char type[20];

	if (entry->in_pack && !entry->preferred_base) {
		unsigned char base[20];
		unsigned long size;
		struct object_entry *base_entry;

		/* We want in_pack_type even if we do not reuse delta.
		 * There is no point not reusing non-delta representations.
		 */
		check_reuse_pack_delta(entry->in_pack,
				       entry->in_pack_offset,
				       base, &size,
				       &entry->in_pack_type);

		/* Check if it is delta, and the base is also an object
		 * we are going to pack.  If so we will reuse the existing
		 * delta.
		 */
		if (!no_reuse_delta &&
		    entry->in_pack_type == OBJ_DELTA &&
		    (base_entry = locate_object_entry(base)) &&
		    (!base_entry->preferred_base)) {

			/* Depth value does not matter - find_deltas()
			 * will never consider reused delta as the
			 * base object to deltify other objects
			 * against, in order to avoid circular deltas.
			 */

			/* uncompressed size of the delta data */
			entry->size = entry->delta_size = size;
			entry->delta = base_entry;
			entry->type = OBJ_DELTA;

			entry->delta_sibling = base_entry->delta_child;
			base_entry->delta_child = entry;

			return;
		}
		/* Otherwise we would do the usual */
	}

	if (sha1_object_info(entry->sha1, type, &entry->size))
		die("unable to get type of object %s",
		    sha1_to_hex(entry->sha1));

	if (!strcmp(type, "commit")) {
		entry->type = OBJ_COMMIT;
	} else if (!strcmp(type, "tree")) {
		entry->type = OBJ_TREE;
	} else if (!strcmp(type, "blob")) {
		entry->type = OBJ_BLOB;
	} else if (!strcmp(type, "tag")) {
		entry->type = OBJ_TAG;
	} else
		die("unable to pack object %s of type %s",
		    sha1_to_hex(entry->sha1), type);
}

static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
{
	struct object_entry *child = me->delta_child;
	unsigned int m = n;
	while (child) {
		unsigned int c = check_delta_limit(child, n + 1);
		if (m < c)
			m = c;
		child = child->delta_sibling;
	}
	return m;
}

static void get_object_details(void)
{
	int i;
	struct object_entry *entry;

	prepare_pack_ix();
	for (i = 0, entry = objects; i < nr_objects; i++, entry++)
		check_object(entry);

	if (nr_objects == nr_result) {
		/*
		 * Depth of objects that depend on the entry -- this
		 * is subtracted from depth-max to break too deep
		 * delta chain because of delta data reusing.
		 * However, we loosen this restriction when we know we
		 * are creating a thin pack -- it will have to be
		 * expanded on the other end anyway, so do not
		 * artificially cut the delta chain and let it go as
		 * deep as it wants.
		 */
		for (i = 0, entry = objects; i < nr_objects; i++, entry++)
			if (!entry->delta && entry->delta_child)
				entry->delta_limit =
					check_delta_limit(entry, 1);
	}
}

typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *);

static entry_sort_t current_sort;

static int sort_comparator(const void *_a, const void *_b)
{
	struct object_entry *a = *(struct object_entry **)_a;
	struct object_entry *b = *(struct object_entry **)_b;
	return current_sort(a,b);
}

static struct object_entry **create_sorted_list(entry_sort_t sort)
{
	struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *));
	int i;

	for (i = 0; i < nr_objects; i++)
		list[i] = objects + i;
	current_sort = sort;
	qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator);
	return list;
}

static int sha1_sort(const struct object_entry *a, const struct object_entry *b)
{
	return memcmp(a->sha1, b->sha1, 20);
}

static struct object_entry **create_final_object_list()
{
	struct object_entry **list;
	int i, j;

	for (i = nr_result = 0; i < nr_objects; i++)
		if (!objects[i].preferred_base)
			nr_result++;
	list = xmalloc(nr_result * sizeof(struct object_entry *));
	for (i = j = 0; i < nr_objects; i++) {
		if (!objects[i].preferred_base)
			list[j++] = objects + i;
	}
	current_sort = sha1_sort;
	qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator);
	return list;
}

static int type_size_sort(const struct object_entry *a, const struct object_entry *b)
{
	if (a->type < b->type)
		return -1;
	if (a->type > b->type)
		return 1;
	if (a->hash < b->hash)
		return -1;
	if (a->hash > b->hash)
		return 1;
	if (a->preferred_base < b->preferred_base)
		return -1;
	if (a->preferred_base > b->preferred_base)
		return 1;
	if (a->size < b->size)
		return -1;
	if (a->size > b->size)
		return 1;
	return a < b ? -1 : (a > b);
}

struct unpacked {
	struct object_entry *entry;
	void *data;
};

/*
 * We search for deltas _backwards_ in a list sorted by type and
 * by size, so that we see progressively smaller and smaller files.
 * That's because we prefer deltas to be from the bigger file
 * to the smaller - deletes are potentially cheaper, but perhaps
 * more importantly, the bigger file is likely the more recent
 * one.
 */
static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth)
{
	struct object_entry *cur_entry = cur->entry;
	struct object_entry *old_entry = old->entry;
	int old_preferred = (old_entry->preferred_base ||
			     old_entry->based_on_preferred);
	unsigned long size, oldsize, delta_size, sizediff;
	long max_size;
	void *delta_buf;

	/* Don't bother doing diffs between different types */
	if (cur_entry->type != old_entry->type)
		return -1;

	/* We do not compute delta to *create* objects we are not
	 * going to pack.
	 */
	if (cur_entry->preferred_base)
		return -1;

	/* If the current object is at pack edge, take the depth the
	 * objects that depend on the current object into account --
	 * otherwise they would become too deep.
	 */
	if (cur_entry->delta_child) {
		if (max_depth <= cur_entry->delta_limit)
			return 0;
		max_depth -= cur_entry->delta_limit;
	}

	size = cur_entry->size;
	if (size < 50)
		return -1;
	oldsize = old_entry->size;
	sizediff = oldsize > size ? oldsize - size : size - oldsize;
	if (sizediff > size / 8)
		return -1;
	if (old_entry->depth >= max_depth)
		return 0;

	/*
	 * NOTE!
	 *
	 * We always delta from the bigger to the smaller, since that's
	 * more space-efficient (deletes don't have to say _what_ they
	 * delete).
	 */
	max_size = size / 2 - 20;
	if (cur_entry->delta) {
		if (cur_entry->based_on_preferred) {
			if (old_preferred)
				max_size = cur_entry->delta_size-1;
			else
				/* trying with non-preferred one when we
				 * already have a delta based on preferred
				 * one is pointless.
				 */
				return -1;
		}
		else if (!old_preferred)
			max_size = cur_entry->delta_size-1;
		else
			/* otherwise...  even if delta with a
			 * preferred one produces a bigger result than
			 * what we currently have, which is based on a
			 * non-preferred one, it is OK.
			 */
			;
	}
	if (sizediff >= max_size)
		return -1;
	delta_buf = diff_delta(old->data, oldsize,
			       cur->data, size, &delta_size, max_size);
	if (!delta_buf)
		return 0;
	cur_entry->delta = old_entry;
	cur_entry->delta_size = delta_size;
	cur_entry->depth = old_entry->depth + 1;
	cur_entry->based_on_preferred = old_preferred;
	free(delta_buf);
	return 0;
}

static void progress_interval(int signum)
{
	signal(SIGALRM, progress_interval);
	progress_update = 1;
}

static void find_deltas(struct object_entry **list, int window, int depth)
{
	int i, idx;
	unsigned int array_size = window * sizeof(struct unpacked);
	struct unpacked *array = xmalloc(array_size);
	unsigned processed = 0;
	unsigned last_percent = 999;

	memset(array, 0, array_size);
	i = nr_objects;
	idx = 0;
	if (progress)
		fprintf(stderr, "Deltifying %d objects.\n", nr_result);

	while (--i >= 0) {
		struct object_entry *entry = list[i];
		struct unpacked *n = array + idx;
		unsigned long size;
		char type[10];
		int j;

		if (!entry->preferred_base)
			processed++;

		if (progress) {
			unsigned percent = processed * 100 / nr_result;
			if (percent != last_percent || progress_update) {
				fprintf(stderr, "%4u%% (%u/%u) done\r",
					percent, processed, nr_result);
				progress_update = 0;
				last_percent = percent;
			}
		}

		if (entry->delta)
			/* This happens if we decided to reuse existing
			 * delta from a pack.  "!no_reuse_delta &&" is implied.
			 */
			continue;

		free(n->data);
		n->entry = entry;
		n->data = read_sha1_file(entry->sha1, type, &size);
		if (size != entry->size)
			die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);

		j = window;
		while (--j > 0) {
			unsigned int other_idx = idx + j;
			struct unpacked *m;
			if (other_idx >= window)
				other_idx -= window;
			m = array + other_idx;
			if (!m->entry)
				break;
			if (try_delta(n, m, depth) < 0)
				break;
		}
		idx++;
		if (idx >= window)
			idx = 0;
	}

	if (progress)
		fputc('\n', stderr);

	for (i = 0; i < window; ++i)
		free(array[i].data);
	free(array);
}

static void prepare_pack(int window, int depth)
{
	get_object_details();
	sorted_by_type = create_sorted_list(type_size_sort);
	if (window && depth)
		find_deltas(sorted_by_type, window+1, depth);
}

static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout)
{
	static const char cache[] = "pack-cache/pack-%s.%s";
	char *cached_pack, *cached_idx;
	int ifd, ofd, ifd_ix = -1;

	cached_pack = git_path(cache, sha1_to_hex(sha1), "pack");
	ifd = open(cached_pack, O_RDONLY);
	if (ifd < 0)
		return 0;

	if (!pack_to_stdout) {
		cached_idx = git_path(cache, sha1_to_hex(sha1), "idx");
		ifd_ix = open(cached_idx, O_RDONLY);
		if (ifd_ix < 0) {
			close(ifd);
			return 0;
		}
	}

	if (progress)
		fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects,
			sha1_to_hex(sha1));

	if (pack_to_stdout) {
		if (copy_fd(ifd, 1))
			exit(1);
		close(ifd);
	}
	else {
		char name[PATH_MAX];
		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd, ofd))
			exit(1);
		close(ifd);

		snprintf(name, sizeof(name),
			 "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx");
		ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666);
		if (ofd < 0)
			die("unable to open %s (%s)", name, strerror(errno));
		if (copy_fd(ifd_ix, ofd))
			exit(1);
		close(ifd_ix);
		puts(sha1_to_hex(sha1));
	}

	return 1;
}

int main(int argc, char **argv)
{
	SHA_CTX ctx;
	char line[PATH_MAX + 20];
	int window = 10, depth = 10, pack_to_stdout = 0;
	struct object_entry **list;
	int i;

	setup_git_directory();

	for (i = 1; i < argc; i++) {
		const char *arg = argv[i];

		if (*arg == '-') {
			if (!strcmp("--non-empty", arg)) {
				non_empty = 1;
				continue;
			}
			if (!strcmp("--local", arg)) {
				local = 1;
				continue;
			}
			if (!strcmp("--incremental", arg)) {
				incremental = 1;
				continue;
			}
			if (!strncmp("--window=", arg, 9)) {
				char *end;
				window = strtoul(arg+9, &end, 0);
				if (!arg[9] || *end)
					usage(pack_usage);
				continue;
			}
			if (!strncmp("--depth=", arg, 8)) {
				char *end;
				depth = strtoul(arg+8, &end, 0);
				if (!arg[8] || *end)
					usage(pack_usage);
				continue;
			}
			if (!strcmp("-q", arg)) {
				progress = 0;
				continue;
			}
			if (!strcmp("--no-reuse-delta", arg)) {
				no_reuse_delta = 1;
				continue;
			}
			if (!strcmp("--stdout", arg)) {
				pack_to_stdout = 1;
				continue;
			}
			usage(pack_usage);
		}
		if (base_name)
			usage(pack_usage);
		base_name = arg;
	}

	if (pack_to_stdout != !base_name)
		usage(pack_usage);

	prepare_packed_git();

	if (progress) {
		struct itimerval v;
		v.it_interval.tv_sec = 1;
		v.it_interval.tv_usec = 0;
		v.it_value = v.it_interval;
		signal(SIGALRM, progress_interval);
		setitimer(ITIMER_REAL, &v, NULL);
		fprintf(stderr, "Generating pack...\n");
	}

	while (fgets(line, sizeof(line), stdin) != NULL) {
		unsigned char sha1[20];

		if (line[0] == '-') {
			if (get_sha1_hex(line+1, sha1))
				die("expected edge sha1, got garbage:\n %s",
				    line+1);
			add_preferred_base(sha1);
			continue;
		}
		if (get_sha1_hex(line, sha1))
			die("expected sha1, got garbage:\n %s", line);
		add_object_entry(sha1, name_hash(NULL, line+41), 0);
	}
	if (progress)
		fprintf(stderr, "Done counting %d objects.\n", nr_objects);
	sorted_by_sha = create_final_object_list();
	if (non_empty && !nr_result)
		return 0;

	SHA1_Init(&ctx);
	list = sorted_by_sha;
	for (i = 0; i < nr_result; i++) {
		struct object_entry *entry = *list++;
		SHA1_Update(&ctx, entry->sha1, 20);
	}
	SHA1_Final(object_list_sha1, &ctx);
	if (progress && (nr_objects != nr_result))
		fprintf(stderr, "Result has %d objects.\n", nr_result);

	if (reuse_cached_pack(object_list_sha1, pack_to_stdout))
		;
	else {
		if (nr_result)
			prepare_pack(window, depth);
		if (progress && pack_to_stdout) {
			/* the other end usually displays progress itself */
			struct itimerval v = {{0,},};
			setitimer(ITIMER_REAL, &v, NULL);
			signal(SIGALRM, SIG_IGN );
			progress_update = 0;
		}
		write_pack_file();
		if (!pack_to_stdout) {
			write_index_file();
			puts(sha1_to_hex(object_list_sha1));
		}
	}
	if (progress)
		fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n",
			nr_result, written, written_delta, reused, reused_delta);
	return 0;
}

^ permalink raw reply related

* Re: parsecvs tool now creates git repositories
From: Jan-Benedict Glaw @ 2006-04-03  7:25 UTC (permalink / raw)
  To: Keith Packard; +Cc: Git Mailing List
In-Reply-To: <1144037456.2303.92.camel@neko.keithp.com>

[-- Attachment #1: Type: text/plain, Size: 1475 bytes --]

On Sun, 2006-04-02 21:10:56 -0700, Keith Packard <keithp@keithp.com> wrote:
> On Sun, 2006-04-02 at 21:31 +0200, Jan-Benedict Glaw wrote:
> > lex.l: In function ‘parse_data’:
> > lex.l:90: error: ‘yytext_ptr’ undeclared (first use in this function)
> > lex.l:90: error: (Each undeclared identifier is reported only once
> > lex.l:90: error: for each function it appears in.)
> > make: *** [lex.o] Error 1
> 
> I think this is a bug in your version of flex; I'm using standard lex
> conventions here. I don't know how to make it work for you.

It compiles for me with this patch (thanks to Linus for the hint):

diff --git a/Makefile b/Makefile
index 639353a..b8f5014 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,8 @@ GCC_WARNINGS2=-Wmissing-prototypes -Wmis
 GCC_WARNINGS3=-Wnested-externs -fno-strict-aliasing
 GCC_WARNINGS=$(GCC_WARNINGS1) $(GCC_WARNINGS2) $(GCC_WARNINGS3)
 CFLAGS=-O0 -g $(GCC_WARNINGS)
-YFLAGS=-d
+YFLAGS=-d -l
+LFLAGS=-l
 
 SRCS=gram.y lex.l cvs.h parsecvs.c cvsutil.c revlist.c atom.c revcvs.c git.c
 

Would you please verify that it doesn't break things for you?

Thanks, JBG

-- 
Jan-Benedict Glaw       jbglaw@lug-owl.de    . +49-172-7608481             _ O _
"Eine Freie Meinung in  einem Freien Kopf    | Gegen Zensur | Gegen Krieg  _ _ O
 für einen Freien Staat voll Freier Bürger"  | im Internet! |   im Irak!   O O O
ret = do_actions((curr | FREE_SPEECH) & ~(NEW_COPYRIGHT_LAW | DRM | TCPA));

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 189 bytes --]

^ permalink raw reply related

* Re: [PATCH] Use sigaction and SA_RESTART in read-tree.c; add option in Makefile.
From: Linus Torvalds @ 2006-04-03  4:40 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jason Riedy, git
In-Reply-To: <7v7j67i93f.fsf@assigned-by-dhcp.cox.net>



On Sun, 2 Apr 2006, Junio C Hamano wrote:

> Jason Riedy <ejr@EECS.Berkeley.EDU> writes:
> 
> > Also add a NO_SA_RESTART option in the Makefile in case someone
> > doesn't have SA_RESTART but does restart (maybe older HP/UX?).
> > We want the builder to chose this specifically in case the
> > system both lacks SA_RESTART and does not restart stdio calls;
> > a compat #define in git-compat-utils.h would silently allow
> > broken systems.
> 
> What am I missing...?

I don't think this part is worth it at least for now.

If there really are systems without SA_RESTART, they probably don't have 
sigaction() either, so #defining SA_RESTART to zero likely won't help.

But somebody can prove me wrong..

		Linus

^ permalink raw reply

* Re: parsecvs tool now creates git repositories
From: Linus Torvalds @ 2006-04-03  4:38 UTC (permalink / raw)
  To: Keith Packard; +Cc: Jan-Benedict Glaw, Git Mailing List
In-Reply-To: <1144037456.2303.92.camel@neko.keithp.com>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2111 bytes --]



On Sun, 2 Apr 2006, Keith Packard wrote:

> On Sun, 2006-04-02 at 21:31 +0200, Jan-Benedict Glaw wrote:
> 
> > lex.l: In function ÿÿparse_dataÿÿ:
> > lex.l:90: error: ÿÿyytext_ptrÿÿ undeclared (first use in this function)
> > lex.l:90: error: (Each undeclared identifier is reported only once
> > lex.l:90: error: for each function it appears in.)
> > make: *** [lex.o] Error 1
> 
> I think this is a bug in your version of flex; I'm using standard lex
> conventions here. I don't know how to make it work for you.

I need something like this to make it work with flex/lex..

The "-l" tells flex to be more traditional.

The "clean" rule is obvious.

And the "yylineno" is a lot more traditional than yyget_lineno(), which 
doesn't work for me at all. I think that's some issue with flex' support 
for re-entrant parsers.

Whether it works after this, I dunno. But at least it compiles.

		Linus

---
diff --git a/Makefile b/Makefile
index 639353a..c7e04a5 100644
--- a/Makefile
+++ b/Makefile
@@ -4,6 +4,7 @@ GCC_WARNINGS3=-Wnested-externs -fno-stri
 GCC_WARNINGS=$(GCC_WARNINGS1) $(GCC_WARNINGS2) $(GCC_WARNINGS3)
 CFLAGS=-O0 -g $(GCC_WARNINGS)
 YFLAGS=-d
+LFLAGS=-l
 
 SRCS=gram.y lex.l cvs.h parsecvs.c cvsutil.c revlist.c atom.c revcvs.c git.c
 
@@ -20,4 +21,4 @@ lex.o: lex.c
 y.tab.h: gram.c
 
 clean:
-	rm -f $(OBJS) y.tab.h gram.c parsecvs
+	rm -f $(OBJS) y.tab.h gram.c parsecvs lex.c
diff --git a/lex.l b/lex.l
index 39cafb0..c7833a4 100644
--- a/lex.l
+++ b/lex.l
@@ -65,8 +65,7 @@ parse_data (int save);
 \t				;
 \n				;
 .				{ 
-				    fprintf (stderr, "%s: (%d) ignoring %c\n", 
-					     yyfilename, yyget_lineno (),
+				    fprintf (stderr, "%s: (%d) ignoring %c\n", yyfilename, yylineno,
 					     yytext[0]);
 				}
 %%
@@ -146,8 +145,7 @@ lex_date (cvs_number *n)
 	d = mktime (&tm);
 	if (d == 0) {
 	    int i;
-	    fprintf (stderr, "%s: (%d) unparsable date: ", yyfilename,
-		     yyget_lineno ());
+	    fprintf (stderr, "%s: (%d) unparsable date: ", yyfilename, yylineno);
 	    for (i = 0; i < n->c; i++) {
 		if (i) fprintf (stderr, ".");
 		fprintf (stderr, "%d", n->n[i]);

^ permalink raw reply related

* Re: [RFH] xdiff shows trivially redundant diff.
From: Davide Libenzi @ 2006-04-03  4:30 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Junio C Hamano, git
In-Reply-To: <Pine.LNX.4.64.0604022116060.3781@g5.osdl.org>

On Sun, 2 Apr 2006, Linus Torvalds wrote:

> On Sun, 2 Apr 2006, Davide Libenzi wrote:
>>
>> Tomorrow I'll take a look at it.
>
> Thanks. I've made the first "release" (2.6.17-rc1) with the new built-in
> diff, let's see if somebody has any issues.

No problem. That's only an eye-issue though, since the diff is still a 
valid diff according to its definition where D=A-B => B+D==A && A-D==B
From the day I released 0.18, xregression is continuosly running w/out any 
issue. I'll check it out though ...



- Davide

^ permalink raw reply

* Re: [PATCH] revision: simplify argument parsing.
From: Junio C Hamano @ 2006-04-03  4:22 UTC (permalink / raw)
  To: Rene Scharfe; +Cc: Linus Torvalds, git
In-Reply-To: <443063E2.1040904@lsrfire.ath.cx>

Rene Scharfe <rene.scharfe@lsrfire.ath.cx> writes:

> The comment is misleading because the patch not only moves stuff around,
> it also removes setting of revs->limited when revs->min_age has been
> set.

You are correct that the patch does more than it claims to.

That is an independent fix mentioned in my earlier message
<7vr74jxhp3.fsf@assigned-by-dhcp.cox.net>.  We did not have to
pay the overhead of having to call limit_list() when only
min_age was specified because we did not do traversal filtering
using UNINTERESTING logic for min_age, but we called it anyway.
Unlike --max-age that marks an old one _and_ its parents
uninteresting, it needs to filter a young one without making its
parents uninteresting.

^ permalink raw reply

* Re: [PATCH] git-clone: fix handling of upsteram whose HEAD does not point at master.
From: Junio C Hamano @ 2006-04-03  4:22 UTC (permalink / raw)
  To: git
In-Reply-To: <7vu09bimj9.fsf@assigned-by-dhcp.cox.net>

Junio C Hamano <junkio@cox.net> writes:

> When cloning from a remote repository that has master, main, and
> origin branches _and_ with the HEAD pointing at main branch, we
> did quite confused things during clone.  So this cleans things
> up.  The behaviour is a bit different between separate remotes/
> layout and the mixed branches layout.
>
> The newer layout with $GIT_DIR/refs/remotes/$origin/, things are
> simpler and more transparent:
>
>  - remote branches are copied to refs/remotes/$origin/.
>  - HEAD points at refs/heads/master, which starts at where the
>    remote HEAD points at.
>  - $GIT_DIR/remotes/$origin file is set up to fetch all remote
>    branches, and merge the branch HEAD pointed at at the time of
>    the cloning.

BTW, the behaviour under --use-separate-remote may be made more
consistent with the traditional layout case by using the same
branch name as the original repository even in that case.

On top of the previous patch, something like the attached one.
I'll put this in "next" and see how it goes, unless anybody
complains.

---

diff --git a/git-clone.sh b/git-clone.sh
index 9cc374e..c013e48 100755
--- a/git-clone.sh
+++ b/git-clone.sh
@@ -376,15 +376,11 @@ then
 	case "$head_points_at" in
 	?*)
 		mkdir -p "$GIT_DIR/remotes" &&
+		git-symbolic-ref HEAD "refs/heads/$head_points_at" &&
 		case "$use_separate_remote" in
-		# With separete-remote, our primary branch is always
-		# at 'master'
 		t)	origin_track="$remote_top/$head_points_at"
 			git-update-ref HEAD "$head_sha1" ;;
-		# Otherwise our primary branch is the same as the remote;
-		# we track upstream with $origin.
 		*)	origin_track="$remote_top/$origin"
-			git-symbolic-ref HEAD "refs/heads/$head_points_at"
 			git-update-ref "refs/heads/$origin" "$head_sha1" ;;
 		esac &&
 		echo >"$GIT_DIR/remotes/$origin" \

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox