Git development

Git development
 help / color / mirror / Atom feed

* [PATCH] Kill a bunch of pointer sign warnings for gcc4
From: Brian Gerst @ 2005-05-18 12:14 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 417 bytes --]

- Raw hashes should be unsigned char.
- String functions want signed char.
- Hash and compress functions want unsigned char.

Signed-off By: Brian Gerst <bgerst@didntduck.org>
----------------------------

PS.
tar-tree.c: In function ‘main’:
tar-tree.c:437: warning: pointer targets in passing argument 1 of 
‘write_header’ differ in signedness

The "0" looks bogus, since it should be a raw hash not a text string.


[-- Attachment #2: ptrsign.diff --]
[-- Type: text/x-patch, Size: 8063 bytes --]

Kill a bunch of pointer sign warnings for gcc4.

---
commit 9d6d4056081ea693b9d0b28a1507921328df0b26
tree 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3
parent 02481aec2a2cfce7bc47d0d10876be5507f0b7ba
author <bgerst@citadel.(none)> Wed, 18 May 2005 07:59:28 -0400
committer <bgerst@citadel.(none)> Wed, 18 May 2005 07:59:28 -0400

 cache.h      |    4 ++--
 diff-cache.c |    2 +-
 diff-files.c |    4 ++--
 http-pull.c  |    4 ++--
 ls-tree.c    |    2 +-
 read-cache.c |    2 +-
 rpush.c      |    2 +-
 sha1_file.c  |   18 +++++++++---------
 strbuf.h     |    2 +-
 tar-tree.c   |    6 +++---
 10 files changed, 23 insertions(+), 23 deletions(-)

Index: cache.h
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/cache.h  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/cache.h  (mode:100644)
@@ -143,7 +143,7 @@
 extern void * map_sha1_file(const unsigned char *sha1, unsigned long *size);
 extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
 extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size);
-extern int write_sha1_file(char *buf, unsigned long len, const char *type, unsigned char *return_sha1);
+extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
 
 extern int check_sha1_signature(unsigned char *sha1, void *buf, unsigned long size, const char *type);
 
@@ -167,7 +167,7 @@
 extern int cache_name_compare(const char *name1, int len1, const char *name2, int len2);
 
 extern void *read_object_with_reference(const unsigned char *sha1,
-					const unsigned char *required_type,
+					const char *required_type,
 					unsigned long *size,
 					unsigned char *sha1_ret);
 
Index: diff-cache.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/diff-cache.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/diff-cache.c  (mode:100644)
@@ -63,7 +63,7 @@
 {
 	unsigned int mode, oldmode;
 	unsigned char *sha1;
-	unsigned char old_sha1_hex[60];
+	char old_sha1_hex[60];
 
 	if (get_stat_data(new, &sha1, &mode) < 0) {
 		if (report_missing)
Index: diff-files.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/diff-files.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/diff-files.c  (mode:100644)
@@ -48,7 +48,7 @@
 }
 
 static void show_modified(int oldmode, int mode,
-			  const char *old_sha1, const char *sha1,
+			  const unsigned char *old_sha1, const unsigned char *sha1,
 			  char *path)
 {
 	char old_sha1_hex[41];
@@ -64,7 +64,7 @@
 
 int main(int argc, char **argv)
 {
-	static const char null_sha1[20] = { 0, };
+	static const unsigned char null_sha1[20] = { 0, };
 	int entries = read_cache();
 	int i;
 
Index: http-pull.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/http-pull.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/http-pull.c  (mode:100644)
@@ -24,7 +24,7 @@
 
 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb, 
 			       void *data) {
-	char expn[4096];
+	unsigned char expn[4096];
 	size_t size = eltsize * nmemb;
 	int posn = 0;
 	do {
@@ -49,7 +49,7 @@
 {
 	char *hex = sha1_to_hex(sha1);
 	char *filename = sha1_file_name(sha1);
-	char real_sha1[20];
+	unsigned char real_sha1[20];
 	char *url;
 	char *posn;
 
Index: ls-tree.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/ls-tree.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/ls-tree.c  (mode:100644)
@@ -24,7 +24,7 @@
 }
 
 static void list_recursive(void *buffer,
-			   const unsigned char *type,
+			   const char *type,
 			   unsigned long size,
 			   struct path_prefix *prefix)
 {
Index: read-cache.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/read-cache.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/read-cache.c  (mode:100644)
@@ -344,7 +344,7 @@
 }
 
 #define WRITE_BUFFER_SIZE 8192
-static char write_buffer[WRITE_BUFFER_SIZE];
+static unsigned char write_buffer[WRITE_BUFFER_SIZE];
 static unsigned long write_buffer_len;
 
 static int ce_write(SHA_CTX *context, int fd, void *data, unsigned int len)
Index: rpush.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/rpush.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/rpush.c  (mode:100644)
@@ -6,7 +6,7 @@
 void service(int fd_in, int fd_out) {
 	ssize_t size;
 	int posn;
-	char sha1[20];
+	char unsigned sha1[20];
 	unsigned long objsize;
 	void *buf;
 	do {
Index: sha1_file.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/sha1_file.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/sha1_file.c  (mode:100644)
@@ -313,13 +313,13 @@
 	int ret, bytes;
 	z_stream stream;
 	char buffer[8192];
-	char *buf;
+	unsigned char *buf;
 
 	/* Get the data stream */
 	memset(&stream, 0, sizeof(stream));
 	stream.next_in = map;
 	stream.avail_in = mapsize;
-	stream.next_out = buffer;
+	stream.next_out = (unsigned char *)buffer;
 	stream.avail_out = sizeof(buffer);
 
 	inflateInit(&stream);
@@ -359,7 +359,7 @@
 }
 
 void *read_object_with_reference(const unsigned char *sha1,
-				 const unsigned char *required_type,
+				 const char *required_type,
 				 unsigned long *size,
 				 unsigned char *actual_sha1_return)
 {
@@ -403,20 +403,20 @@
 	}
 }
 
-int write_sha1_file(char *buf, unsigned long len, const char *type, unsigned char *returnsha1)
+int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
 {
 	int size;
-	char *compressed;
+	unsigned char *compressed;
 	z_stream stream;
 	unsigned char sha1[20];
 	SHA_CTX c;
 	char *filename;
 	static char tmpfile[PATH_MAX];
-	char hdr[50];
+	unsigned char hdr[50];
 	int fd, hdrlen, ret;
 
 	/* Generate the header */
-	hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
+	hdrlen = sprintf((char *)hdr, "%s %lu", type, len)+1;
 
 	/* Sha1.. */
 	SHA1_Init(&c);
@@ -516,8 +516,8 @@
 	int local;
 	z_stream stream;
 	unsigned char real_sha1[20];
-	char buf[4096];
-	char discard[4096];
+	unsigned char buf[4096];
+	unsigned char discard[4096];
 	int ret;
 	SHA_CTX c;
 
Index: strbuf.h
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/strbuf.h  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/strbuf.h  (mode:100644)
@@ -4,7 +4,7 @@
 	int alloc;
 	int len;
 	int eof;
-	unsigned char *buf;
+	char *buf;
 };
 
 extern void strbuf_init(struct strbuf *);
Index: tar-tree.c
===================================================================
--- 2b3e8f627f4b8338e1479f6011052d2f6c0e2468/tar-tree.c  (mode:100644)
+++ 6da6a42bd7b97ea6ebd79544f4fb16713ac74dc3/tar-tree.c  (mode:100644)
@@ -205,7 +205,7 @@
 	append_char(p, '\n');
 }
 
-static void write_header(const char *, char, const char *, struct path_prefix *,
+static void write_header(const unsigned char *, char, const char *, struct path_prefix *,
                          const char *, unsigned int, void *, unsigned long);
 
 /* stores a pax extended header directly in the block buffer */
@@ -238,7 +238,7 @@
 	free(buffer);
 }
 
-static void write_global_extended_header(const char *sha1)
+static void write_global_extended_header(const unsigned char *sha1)
 {
 	char *p;
 	unsigned int size;
@@ -253,7 +253,7 @@
 }
 
 /* stores a ustar header directly in the block buffer */
-static void write_header(const char *sha1, char typeflag, const char *basepath,
+static void write_header(const unsigned char *sha1, char typeflag, const char *basepath,
                          struct path_prefix *prefix, const char *path,
                          unsigned int mode, void *buffer, unsigned long size)
 {


^ permalink raw reply

* Re: gitweb wishlist
From: Petr Baudis @ 2005-05-18  9:45 UTC (permalink / raw)
  To: Kay Sievers; +Cc: git
In-Reply-To: <1116384951.5094.83.camel@dhcp-188.off.vrfy.org>

Dear diary, on Wed, May 18, 2005 at 04:55:51AM CEST, I got a letter
where Kay Sievers <kay.sievers@vrfy.org> told me that...
> On Wed, 2005-05-11 at 03:26 +0200, Petr Baudis wrote:
> 
> >   I think I would prefer the link from the repository index to go not to
> > the log page, but some "summary" page, which would have some short
> > information about the repository (owner, description, list of branches
> > if gitweb supports that, list of tags, link to the latest tree and link
> > to the log).
> 
> I did this now. The top-link shows now the repository listing with a
> nice "last change" field. The default link points to an overview page
> which also list the tags.
> (The owner filed in that list is not correct until now, cause the
> cron-job needs to be adapted.)

Thanks, this is exactly how I envisioned it. :-)

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply

* Re: [PATCH] improved delta support for git
From: Dan Holmsand @ 2005-05-18  8:54 UTC (permalink / raw)
  To: git
In-Reply-To: <Pine.LNX.4.62.0505180005230.20274@localhost.localdomain>

Nicolas Pitre wrote:
> My goal is to provide the mechanism that can be used by a higher level 
> implementing the deltafication policy.  I only provided one script as an 
> example, but it turns out that you found a way to achieve better space 
> saving.  And I bet you that there is probably ways to do even better 
> with more exhaustive delta targets.  For example you could try all 
> possible combinations on an object list for each file (and let it run 
> overnight).

Well, any kind of deltafication of, say, the complete kernel history 
pretty much has to run overnight anyway :-)

> One thing I've been wondering about is whether gzipping small deltas is 
> actually a gain.  For very small files it seems that gzip is adding more 
> overhead making the compressed file actually larger.  Might be worth 
> storing some deltas uncompressed if the compressed version turns out to 
> be larger.

It's probably better to skip deltafication of very small files 
altogether. Big pain for small gain, and all that.

>>1) I limit the maximum size of any delta to 10% of the size of the new
>>version. That guarantees a big saving, as long as any delta is
>>produced.
> 
> 
> Well, any delta object smaller than its original object saves space, 
> even if it's 75% of the original size. But...

That's not true if you want to keep the delta chain length down (and 
thus performance up).

Then, the most efficient approach is to generate many deltas against the 
same base file (otherwise, you only get 50% delta files with a maximum 
delta depth of 1).

But in this case, the trick is to know when to stop deltafying against 
one base file, and start over with another. If you switch to a new 
keyframe too often, you obviously lose some potential savings. But if 
you don't switch often enough, you end up repeating the same data in too 
many delta files.

A maximum delta size of 10% turned out to be ideal for at least the "fs" 
  tree. 8% was significantly worse, as was 15%. (The ideal size depends 
on  how big the average change is: the smaller the average change, the 
smaller the max delta size should be).

> ... but then the ultimate solution is to try out all possible references 
> within a given list.  My git-deltafy-script already finds out the list 
> of objects belonging to the same file.  Maybe git-mkdelta should try 
> all combinations between them.  This way a deeper delta chain could be 
> allowed for maximum space saving.

Yeah. But then you lose the ability to do incremental deltafication, or 
deltafication on-the-fly. And it would be really, really nice to have 
git do deltas at commit time - that way you could keep the very cool 
"immutable objects" property of git, while still saving a lot of space.

> I will look at it and merge the good stuff.

Cool! Thanks!

/dan

^ permalink raw reply

* [PATCH 1/1] Diff-helper update
From: Junio C Hamano @ 2005-05-18  6:29 UTC (permalink / raw)
  To: torvalds; +Cc: git, pasky

This patch adds a framework and a stub implementation of rename
detection to diff-helper program.

The current stub code is just enough to detect pure renames in
diff-tree output and not fancier.  The plan is perhaps to use
the same delta code when Nico's delta storage patch is merged
for similarity evaluation purposes.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

Documentation/git-diff-helper.txt |   12 ++-
diff-helper.c                     |  141 +++++++++++++++++++++++++++++++++++++-
diff.c                            |   34 +++++----
diff.h                            |    2 
4 files changed, 170 insertions(+), 19 deletions(-)

diff -git a/Documentation/git-diff-helper.txt b/Documentation/git-diff-helper.txt
--- a/Documentation/git-diff-helper.txt
+++ b/Documentation/git-diff-helper.txt
@@ -9,7 +9,7 @@
 
 SYNOPSIS
 --------
-'git-diff-helper' [-z] [-R]
+'git-diff-helper' [-z] [-R] [-r]
 
 DESCRIPTION
 -----------
@@ -28,7 +28,12 @@
 
 		git-diff-cache <tree> | git-diff-helper -R file.c
 
-would show a diff to bring the working file back to what is in the <tree>.
+	would show a diff to bring the working file back to what
+	is in the <tree>.
+
+-r::
+	Detect renames.
+
 
 See Also
 --------
@@ -37,7 +42,8 @@
 
 Author
 ------
-Written by Linus Torvalds <torvalds@osdl.org>
+Written by Junio C Hamano <junkio@cox.net>
+
 
 Documentation
 --------------
diff -git a/diff-helper.c b/diff-helper.c
--- a/diff-helper.c
+++ b/diff-helper.c
@@ -21,6 +21,129 @@
 	return 0;
 }
 
+static int detect_rename = 0;
+
+/*
+ * We do not detect circular renames.  Just hold created and deleted
+ * entries and later attempt to match them up.  If they do not match,
+ * then spit them out as deletes or creates as original.
+ */
+
+static struct diff_spec_hold {
+	struct diff_spec_hold *next;
+	struct diff_spec_hold *matched;
+	struct diff_spec old, new;
+	char path[1];
+} *createdfile, *deletedfile;
+
+static void hold_spec(const char *path,
+		      struct diff_spec *old, struct diff_spec *new)
+{
+	struct diff_spec_hold **list, *elem;
+	list = (! old->file_valid) ? &createdfile : &deletedfile;
+	elem = xmalloc(sizeof(*elem) + strlen(path));
+	strcpy(elem->path, path);
+	elem->next = *list;
+	*list = elem;
+	elem->old = *old;
+	elem->new = *new;
+	elem->matched = 0;
+}
+
+#define MINIMUM_SCORE 7000
+int estimate_similarity(struct diff_spec *one, struct diff_spec *two)
+{
+	/* Return how similar they are, representing the score as an
+	 * integer between 0 and 10000.
+	 *
+	 * This version is very dumb and detects exact matches only.
+	 * Wnen Nico's delta stuff gets in, I'll use the delta
+	 * algorithm to estimate the similarity score in core.
+	 */
+
+	if (one->sha1_valid && two->sha1_valid &&
+	    !memcmp(one->blob_sha1, two->blob_sha1, 20))
+		return 10000;
+	return 0;
+}
+
+static void flush_renames(const char **spec, int cnt, int reverse)
+{
+	struct diff_spec_hold *rename_src, *rename_dst, *elem;
+	struct diff_spec_hold *leftover = NULL;
+	int score, best_score;
+
+	while (createdfile) {
+		rename_dst = createdfile;
+		createdfile = rename_dst->next;
+		best_score = MINIMUM_SCORE;
+		rename_src = NULL;
+		for (elem = deletedfile;
+		     elem;
+		     elem = elem->next) {
+			if (elem->matched)
+				continue;
+			score = estimate_similarity(&elem->old,
+						    &rename_dst->new);
+			if (best_score < score) {
+				rename_src = elem;
+				best_score = score;
+			}
+		}
+		if (rename_src) {
+			rename_src->matched = rename_dst;
+			rename_dst->matched = rename_src;
+
+			if (!cnt ||
+			    matches_pathspec(rename_src->path, spec, cnt) ||
+			    matches_pathspec(rename_dst->path, spec, cnt)) {
+				if (reverse)
+					run_external_diff(rename_dst->path,
+							  rename_src->path,
+							  &rename_dst->new,
+							  &rename_src->old);
+				else
+					run_external_diff(rename_src->path,
+							  rename_dst->path,
+							  &rename_src->old,
+							  &rename_dst->new);
+			}
+		}
+		else {
+			rename_dst->next = leftover;
+			leftover = rename_dst;
+		}
+	}
+
+	/* unmatched deletes */
+	for (elem = deletedfile; elem; elem = elem->next) {
+		if (elem->matched)
+			continue;
+		if (!cnt ||
+		    matches_pathspec(elem->path, spec, cnt)) {
+			if (reverse)
+				run_external_diff(elem->path, NULL,
+						  &elem->new, &elem->old);
+			else
+				run_external_diff(elem->path, NULL,
+						  &elem->old, &elem->new);
+		}
+	}
+
+	/* unmatched creates */
+	for (elem = leftover; elem; elem = elem->next) {
+		if (!cnt ||
+		    matches_pathspec(elem->path, spec, cnt)) {
+			if (reverse)
+				run_external_diff(elem->path, NULL,
+						  &elem->new, &elem->old);
+			else
+				run_external_diff(elem->path, NULL,
+						  &elem->old, &elem->new);
+		}
+	}
+}
+
 static int parse_oneside_change(const char *cp, struct diff_spec *one,
 				char *path)
 {
@@ -100,17 +223,24 @@
 	default:
 		return -1;
 	}
+
+	if (detect_rename && old.file_valid != new.file_valid) {
+		/* hold these */
+		hold_spec(path, &old, &new);
+		return 0;
+	}
+
 	if (!cnt || matches_pathspec(path, spec, cnt)) {
 		if (reverse)
-			run_external_diff(path, &new, &old);
+			run_external_diff(path, NULL, &new, &old);
 		else
-			run_external_diff(path, &old, &new);
+			run_external_diff(path, NULL, &old, &new);
 	}
 	return 0;
 }
 
 static const char *diff_helper_usage =
-"git-diff-helper [-R] [-z] paths...";
+	"git-diff-helper [-r] [-R] [-z] paths...";
 
 int main(int ac, const char **av) {
 	struct strbuf sb;
@@ -124,6 +254,8 @@
 			reverse = 1;
 		else if (av[1][1] == 'z')
 			line_termination = 0;
+		else if (av[1][1] == 'r')
+			detect_rename = 1;
 		else
 			usage(diff_helper_usage);
 		ac--; av++;
@@ -139,5 +271,8 @@
 		if (status)
 			fprintf(stderr, "cannot parse %s\n", sb.buf);
 	}
+
+	if (detect_rename)
+		flush_renames(av+1, ac-1, reverse);
 	return 0;
 }
diff -git a/diff.c b/diff.c
--- a/diff.c
+++ b/diff.c
@@ -79,7 +79,8 @@
 	char tmp_path[50];
 } diff_temp[2];
 
-static void builtin_diff(const char *name,
+static void builtin_diff(const char *name_a,
+			 const char *name_b,
 			 struct diff_tempfile *temp)
 {
 	int i, next_at;
@@ -88,9 +89,12 @@
 	const char *input_name_sq[2];
 	const char *path0[2];
 	const char *path1[2];
-	const char *name_sq = sq_expand(name);
+	const char *name_sq[2];
 	char *cmd;
-	
+
+	name_sq[0] = sq_expand(name_a);
+	name_sq[1] = sq_expand(name_b);
+
 	/* diff_cmd and diff_arg have 6 %s in total which makes
 	 * the sum of these strings 12 bytes larger than required.
 	 * we use 2 spaces around diff-opts, and we need to count
@@ -105,7 +109,7 @@
 			path1[i] = "";
 		} else {
 			path0[i] = i ? "b/" : "a/";
-			path1[i] = name_sq;
+			path1[i] = name_sq[i];
 		}
 		cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
 			     strlen(input_name_sq[i]));
@@ -122,7 +126,7 @@
 	next_at += snprintf(cmd+next_at, cmd_size-next_at,
 			    diff_arg, input_name_sq[0], input_name_sq[1]);
 
-	printf("diff -git a/%s b/%s\n", name, name);
+	printf("diff -git a/%s b/%s\n", name_a, name_b);
 	if (!path1[0][0])
 		printf("new file mode %s\n", temp[1].mode);
 	else if (!path1[1][0])
@@ -132,6 +136,10 @@
 			printf("old mode %s\n", temp[0].mode);
 			printf("new mode %s\n", temp[1].mode);
 		}
+		if (strcmp(name_a, name_b)) {
+			printf("rename old %s\n", name_a);
+			printf("rename new %s\n", name_b);
+		}
 		if (strncmp(temp[0].mode, temp[1].mode, 3))
 			/* we do not run diff between different kind
 			 * of objects.
@@ -157,7 +165,7 @@
 	 * benchmark with my previous version that always reads cache
 	 * shows that it makes things worse for diff-tree comparing
 	 * two linux-2.6 kernel trees in an already checked out work
-	 * tree.  This is because most diff-tree comparison deals with
+	 * tree.  This is because most diff-tree comparisons deal with
 	 * only a small number of files, while reading the cache is
 	 * expensive for a large project, and its cost outweighs the
 	 * savings we get by not inflating the object to a temporary
@@ -294,6 +302,7 @@
  *
  */
 void run_external_diff(const char *name,
+		       const char *other,
 		       struct diff_spec *one,
 		       struct diff_spec *two)
 {
@@ -304,7 +313,7 @@
 
 	if (one && two) {
 		prepare_temp_file(name, &temp[0], one);
-		prepare_temp_file(name, &temp[1], two);
+		prepare_temp_file(other ? : name, &temp[1], two);
 		if (! atexit_asked &&
 		    (temp[0].name == temp[0].tmp_path ||
 		     temp[1].name == temp[1].tmp_path)) {
@@ -320,7 +329,8 @@
 		die("unable to fork");
 	if (!pid) {
 		const char *pgm = external_diff();
-		if (pgm) {
+		/* not passing rename patch to external ones */
+		if (!other && pgm) {
 			if (one && two)
 				execlp(pgm, pgm,
 				       name,
@@ -334,7 +344,7 @@
 		 * otherwise we use the built-in one.
 		 */
 		if (one && two)
-			builtin_diff(name, temp);
+			builtin_diff(name, other ? : name, temp);
 		else
 			printf("* Unmerged path %s\n", name);
 		exit(0);
@@ -379,7 +389,7 @@
 		strcpy(concatpath, base);
 		strcat(concatpath, path);
 	}
-	run_external_diff(path ? concatpath : base, one, two);
+	run_external_diff(path ? concatpath : base, NULL, one, two);
 }
 
 void diff_change(unsigned old_mode, unsigned new_mode,
@@ -400,10 +410,10 @@
 		strcpy(concatpath, base);
 		strcat(concatpath, path);
 	}
-	run_external_diff(path ? concatpath : base, &spec[0], &spec[1]);
+	run_external_diff(path ? concatpath : base, NULL, &spec[0], &spec[1]);
 }
 
 void diff_unmerge(const char *path)
 {
-	run_external_diff(path, NULL, NULL);
+	run_external_diff(path, NULL, NULL, NULL);
 }
diff -git a/diff.h b/diff.h
--- a/diff.h
+++ b/diff.h
@@ -31,7 +31,7 @@
 	unsigned file_valid : 1; /* if false the file does not even exist */
 };
 
-extern void run_external_diff(const char *name,
+extern void run_external_diff(const char *name, const char *other,
 			      struct diff_spec *, struct diff_spec *);
 
 #endif /* DIFF_H */
------------------------------------------------


^ permalink raw reply

* [PATCH 0/1] Diff-helper update
From: Junio C Hamano @ 2005-05-18  6:28 UTC (permalink / raw)
  To: torvalds; +Cc: git, pasky

This is just a cover letter but the next patch implements the
rename detection I told you about.

The output from the patched version is attached at the end of
this message as a demonstration.

My plan is to get the documentation and the framework in place
with this patch first.  The basic strategy is to hold created
and deleted files while we parse the incoming diff-tree output,
and match them up at the end, looking for usefully similar pair.

The similarity evaluator included in this round of patch detects
exact renames only, which is not very useful in practice, but
that would be improved in the later round.  It will probably be
done with the same deltify code Nico is using.

$ git-diff-tree -r \
    13ab4462d2aefb252d7c916bd537151856b7c967 \
    99665af5c0be0fe4319b39183e84917993153576 | ./git-diff-helper -r
diff -git a/Documentation/diff-format.txt b/Documentation/diff-format.txt
--- a/Documentation/diff-format.txt
+++ b/Documentation/diff-format.txt
@@ -45,7 +45,7 @@ with a '-p' option, they do not produce 
 instead they produce a patch file.
 ...
diff -git a/diff.h b/diff.h
--- a/diff.h
+++ b/diff.h
@@ -17,7 +17,7 @@ extern void diff_change(unsigned mode1, 

 extern void diff_unmerge(const char *path);

-/* These are for diff-tree-helper */
+/* These are for diff-helper */

 struct diff_spec {
 	unsigned char blob_sha1[20];
diff -git a/diff-tree-helper.c b/diff-helper.c
rename old diff-tree-helper.c
rename new diff-helper.c
diff -git a/Documentation/git-diff-tree-helper.txt b/Documentation/git-diff-helper.txt
rename old Documentation/git-diff-tree-helper.txt
rename new Documentation/git-diff-helper.txt

^ permalink raw reply

* Re: git-rev-list in local commit order
From: Jon Seymour @ 2005-05-18  5:16 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, Sean, git
In-Reply-To: <Pine.LNX.4.58.0505171035570.18337@ppc970.osdl.org>

On 5/18/05, Linus Torvalds <torvalds@osdl.org> wrote:
> 
> 
> On Tue, 17 May 2005, Thomas Gleixner wrote:
> >
> > On Tue, 2005-05-17 at 08:43 -0700, Linus Torvalds wrote:
> > > > My idea of repository id was not the notion of workspace seperation. I
> > > > dont care in which directory and on which machine you or who ever
> > > > commits a line of code. I care where the change appears in a public
> > > > repository, which is unique.
> > >
> > > You seem to think that the repository on master.kernel.org is more
> > > important than the one on my private machine, and you're _wrong_.
> >
> > For me yes, as I have no access to your private ones and I can only rely
> > on the integrity of the public accessible ones.
> >
> > For the individual developer the private workspaces are surely more
> > important. I never doubted that, but I do not care whether you use one
> > or ten workspaces and which one of them you blow away or use for
> > updating of master.kernel.org.
> 
> But how would you track "repositoryness", when the repository you care
> about has absolutely nothing to do with the repositories that any of the
> developers who created it in the first place care about?
> 
> See the problem? You can't. You seem to want to track information that
> simply does not _exist_.
> 
> Put another way: the repository ID of the eventual public "target"
> repository only becomes available once the information has been pushed
> there, not before. So a "commit" cannot contain that information, because
> at commit time, you fundamentally cannot know what the eventual public
> repository (if any) will be.

Earlier in a related thread, I argued that what everyone else has been
calling a repo-id is actually a workspace id. Your GIT_COMMITER_EMAIL
idea would have the same practical effect as a separate workspace id,
though it does pollute the interpretation of the e-mail id's since
they are no longer pure e-mail id's...

Would you be amenable to a patch that allowed tools to put attributes
of the form:

   x-"some-attribute" (' ' [^\0]*)+ '\0'

into a commit header?

If, over time, x-"some-attribute" became unversially accepted as
useful, a new release of git could bless it as official and the 'x-'
prefix could be dropped.

In the meantime, tools could experiment with additional commit markers
as they see fit without affecting the interoperability of other tools
which use only the blessed markers.

Of course, a constraint on the semantics of an x-* attribute would
ideally be that it's value must be fixed for all time once the commit
happens since there is no way to change it without creating a new
commit.

jon.

^ permalink raw reply

* Re: [PATCH] improved delta support for git
From: Nicolas Pitre @ 2005-05-18  4:32 UTC (permalink / raw)
  To: Dan Holmsand; +Cc: git
In-Reply-To: <d6dohe$dql$1@sea.gmane.org>

On Tue, 17 May 2005, Dan Holmsand wrote:

> I've been trying out your delta stuff as well. It was a bit
> disappointing at first, but some tweaking payed off in the end...

Cool!

My goal is to provide the mechanism that can be used by a higher level 
implementing the deltafication policy.  I only provided one script as an 
example, but it turns out that you found a way to achieve better space 
saving.  And I bet you that there is probably ways to do even better 
with more exhaustive delta targets.  For example you could try all 
possible combinations on an object list for each file (and let it run 
overnight).

> 1) Too many deltas get too big and/or compress badly.

One thing I've been wondering about is whether gzipping small deltas is 
actually a gain.  For very small files it seems that gzip is adding more 
overhead making the compressed file actually larger.  Might be worth 
storing some deltas uncompressed if the compressed version turns out to 
be larger.

> 2) Trees take up a big chunk of total space.

Tree objects can be deltafied as well, but I didn't had time to script 
it.  A large space saving can be expected there as well, especially for 
changesets that modify only a few files deep down the tree hierarchy.

> Therefore, I tried some other approaches. This one seemed to work
> best:
> 
> 1) I limit the maximum size of any delta to 10% of the size of the new
> version. That guarantees a big saving, as long as any delta is
> produced.

Well, any delta object smaller than its original object saves space, 
even if it's 75% of the original size. But...

> 2) If the "previous" version of a blob is a delta, I produce the new
> delta form the old deltas base version. This works surprisingly well.
> I'm guessing the reason for this is that most changes are really
> small, and they tend to be in the same area as a previous change (as
> in "Commit new feature. Commit bugfix for new feature. Commit fix for
> bugfix of new feature. Delete new feature as it doesn't work...").

... but then the ultimate solution is to try out all possible references 
within a given list.  My git-deltafy-script already finds out the list 
of objects belonging to the same file.  Maybe git-mkdelta should try 
all combinations between them.  This way a deeper delta chain could be 
allowed for maximum space saving.

> 3) I use the same method for all tree objects.

Yup.

> Attached is a patch (against current cogito). It is basically the same
> as yours, Nicolas, except for some hackery to make the above possible.
> I'm sure I've made lots of stupid mistakes in it (and the 10% limit is
> hardcoded right now; I'm lazy).

I will look at it and merge the good stuff.

Thanks for testing!

Nicolas

^ permalink raw reply

* Re: gitweb wishlist
From: Kay Sievers @ 2005-05-18  2:55 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git
In-Reply-To: <20050511012626.GL26384@pasky.ji.cz>

On Wed, 2005-05-11 at 03:26 +0200, Petr Baudis wrote:

>   I think I would prefer the link from the repository index to go not to
> the log page, but some "summary" page, which would have some short
> information about the repository (owner, description, list of branches
> if gitweb supports that, list of tags, link to the latest tree and link
> to the log).

I did this now. The top-link shows now the repository listing with a
nice "last change" field. The default link points to an overview page
which also list the tags.
(The owner filed in that list is not correct until now, cause the
cron-job needs to be adapted.)

Thanks,
Kay

^ permalink raw reply

* Re: [PATCH 2/4] Tweak diff output further to make it a bit less distracting.
From: Linus Torvalds @ 2005-05-17 23:32 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Petr Baudis, git
In-Reply-To: <7vsm0lqym3.fsf@assigned-by-dhcp.cox.net>

On Tue, 17 May 2005, Junio C Hamano wrote:
> 
> Now, in the new diff format, if the rename is really a pure
> rename, then we would have:
> 
>      diff -git a/nitfol b/nitfol
>      rename old frotz
>      rename new nitfol
>      diff -git a/rezrov b/rezrov
>      --- a/rezrov
>      +++ b/rezrov
>      @@ ...
> 
> that is, nothing until the patch for the next file or EOF.  Is
> this acceptable?

I think that's exactly what we want. At least it does exactly the right 
thing for me, when I do '/^diff ' in less, with nice highlighting of the 
headers.

With people inevitably adding some nice coloration support in gitweb etc,
and it will be outstanding.

		Linus

^ permalink raw reply

* [PATCH] packed delta git
From: Chris Mason @ 2005-05-17 22:57 UTC (permalink / raw)
  To: git, Nicolas Pitre

Hello everyone,

Here's a new version of my packed git patch, diffed on top of Nicolas' 
delta code (link below to that).  It doesn't change the core git commands
to create packed/delta files, that is done via a new git-pack command.  The
git-pack usage is very simple:

git-pack [<reference_sha1>:]<target_sha1> [ <next_sha1> ... ]

If you use the ref:target notation, it will create a delta between those 
two objects and relink the target into the resulting packed file.  If you
don't provide a reference sha1, the sha1 file is relinked into the packed file.

A script is provided (git-pack-changes-script) that walks the rev-list output 
and puts your whole repository into packed/delta form.  This is just
a starting point, it needs more knobs for forward/reverse deltas,
max depth etc.  The script does delta tree files and packs both trees and 
commits in with the blobs.

git-diff-tree -t was added so that it will show the sha1 of subtrees that
differ while it processes things.

There is no way to unpack a file yet, so please use these with caution.

I did tests against the current 2.6 git tree (ext3)

                                      vanilla              delta+pack
.git size                          191M                62M
checkout-cache (cold)     2m13s              38.9s
checkout-cache (hot)      8s  (2s user)      14.9s (11.46s user)

2.6.11 without any changesets:
                                      unpacked          packed
.git size                           91M                  55M

Because the 2.6 kernel repo has 2.6.11 floating around in there as a tree
with no commit, you have to do a few steps in order to pack things in. 

# step one, pack all the files in 2.6.11 together
git-ls-tree -r v2.6.11 | awk '{print $3}' | xargs git-pack

# step two, make packed deltas between trees for 2.6.11 and 2.6.12-rc2
# (git-pack-changes-script -t only works with trees, not commits/tags)
git-pack-changes-script -t c39ae07f393806ccf406ef966e9a15afc43cc36a 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 | xargs git-pack

# step three, use git-pack-changes to walk whole rev-list and pack/delta
# everything else
git-pack-changes-script | xargs git-pack

And finally, here's the patch.  It is on top of Nicolas' code, which you
can grab here:

http://marc.theaimsgroup.com/?l=git&m=111587004902021&w=2

Signed-off-by: Chris Mason <mason@suse.com>
--

diff -urN linus.delta/cache.h linus/cache.h
--- linus.delta/cache.h	2005-05-17 16:51:51.410686192 -0400
+++ linus/cache.h	2005-05-17 15:17:16.015477408 -0400
@@ -77,6 +77,16 @@
 	char name[0];
 };
 
+struct packed_item {
+	/* length of compressed data */
+	unsigned long len;
+	struct packed_item *next;
+	/* sha1 of uncompressed data */
+	char sha1[20];
+	/* compressed data */
+	char *data;
+};
+
 #define CE_NAMEMASK  (0x0fff)
 #define CE_STAGEMASK (0x3000)
 #define CE_STAGESHIFT 12
@@ -135,8 +145,10 @@
 
 /* Read and unpack a sha1 file into memory, write memory to a sha1 file */
 extern void * map_sha1_file(const unsigned char *sha1, unsigned long *size);
-extern void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
+extern void * unpack_sha1_file(const unsigned char *sha1, void *map, unsigned long mapsize, char *type, unsigned long *size, const unsigned char *recur_sha1, int *chain);
+extern void * raw_unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size);
 extern void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size);
+extern void * read_sha1_delta_ref(const unsigned char *sha1, char *type, unsigned long *size, const unsigned char *, int *chain);
 extern int write_sha1_file(char *buf, unsigned long len, const char *type, unsigned char *return_sha1);
 
 extern int check_sha1_signature(unsigned char *sha1, void *buf, unsigned long size, const char *type);
@@ -152,6 +164,10 @@
 extern int get_sha1(const char *str, unsigned char *sha1);
 extern int get_sha1_hex(const char *hex, unsigned char *sha1);
 extern char *sha1_to_hex(const unsigned char *sha1);	/* static buffer result! */
+extern int pack_sha1_buffer(void *buf, unsigned long buf_len, char *type,
+                            unsigned char *returnsha1, unsigned char *refsha1, 
+			    struct packed_item **, int max_depth);
+int write_packed_list(struct packed_item *head);
 
 /* General helper functions */
 extern void usage(const char *err);
Files linus.delta/.cache.h.swp and linus/.cache.h.swp differ
diff -urN linus.delta/diff-tree.c linus/diff-tree.c
--- linus.delta/diff-tree.c	2005-05-17 16:51:51.413685736 -0400
+++ linus/diff-tree.c	2005-05-16 20:08:41.000000000 -0400
@@ -5,6 +5,7 @@
 static int silent = 0;
 static int verbose_header = 0;
 static int ignore_merges = 1;
+static int show_tree_diffs = 0;
 static int recursive = 0;
 static int read_stdin = 0;
 static int line_termination = '\n';
@@ -114,6 +115,7 @@
 	const unsigned char *sha1, *sha2;
 	int cmp, pathlen1, pathlen2;
 	char old_sha1_hex[50];
+	int retval = 0;
 
 	sha1 = extract(tree1, size1, &path1, &mode1);
 	sha2 = extract(tree2, size2, &path2, &mode2);
@@ -143,11 +145,11 @@
 	}
 
 	if (recursive && S_ISDIR(mode1)) {
-		int retval;
 		char *newbase = malloc_base(base, path1, pathlen1);
 		retval = diff_tree_sha1(sha1, sha2, newbase);
 		free(newbase);
-		return retval;
+		if (!show_tree_diffs)
+			return retval;
 	}
 
 	if (header) {
@@ -155,7 +157,7 @@
 		header = NULL;
 	}
 	if (silent)
-		return 0;
+		return retval;
 
 	if (generate_patch) {
 		if (!S_ISDIR(mode1))
@@ -168,7 +170,7 @@
 		       old_sha1_hex, sha1_to_hex(sha2), base, path1,
 		       line_termination);
 	}
-	return 0;
+	return retval;
 }
 
 static int interesting(void *tree, unsigned long size, const char *base)
@@ -387,7 +389,7 @@
 }
 
 static char *diff_tree_usage =
-"diff-tree [-p] [-r] [-z] [--stdin] [-m] [-s] [-v] <tree sha1> <tree sha1>";
+"diff-tree [-p] [-r] [-z] [--stdin] [-m] [-s] [-v] [-t] <tree sha1> <tree sha1>";
 
 int main(int argc, char **argv)
 {
@@ -428,6 +430,10 @@
 			silent = 1;
 			continue;
 		}
+		if (!strcmp(arg, "-t")) {
+			show_tree_diffs = 1;
+			continue;
+		}
 		if (!strcmp(arg, "-v")) {
 			verbose_header = 1;
 			header_prefix = "diff-tree ";
diff -urN linus.delta/git-pack-changes-script linus/git-pack-changes-script
--- linus.delta/git-pack-changes-script	1969-12-31 19:00:00.000000000 -0500
+++ linus/git-pack-changes-script	2005-05-17 16:57:44.768967568 -0400
@@ -0,0 +1,161 @@
+#!/usr/bin/perl
+#
+# script to search through the rev-list output and generate delta history
+# you can specify either a start and stop commit or two trees to search.
+# with no command line args it searches the entire revision history.
+# output is suitable for piping to xargs git-pack
+
+use strict;
+
+my $ret;
+my $i;
+my @wanted = ();
+my $argc = scalar(@ARGV);
+my $commit;
+my $stop;
+my %delta = ();
+
+sub add_delta($$) {
+    my ($ref, $target) = @_;
+    if (defined($delta{$target})) {
+        return;
+    }
+    if ($target eq $delta{$ref}) {
+	print $ref;
+	return 1;
+    }
+    $delta{$target} = $ref;
+    print "$ref:$target\n";
+
+}
+sub print_usage() {
+    print STDERR "usage: pack-changes [-c commit] [-s stop commit] [-t tree1 tree2]\n";
+    exit(1);
+}
+
+sub find_tree($) {
+    my ($commit) = @_;
+    open(CM, "git-cat-file commit $commit|") || die "git-cat-file failed";
+    while(<CM>) {
+        chomp;
+	my @words = split;
+	if ($words[0] eq "tree") {
+	    return $words[1];
+	} elsif ($words[0] ne "parent") {
+	    last;
+	}
+    }
+    close(CM);
+    if ($? && ($ret = $? >> 8)) {
+        die "cat-file $commit failed with $ret";
+    }
+    return undef;
+}
+
+sub test_diff($$) {
+    my ($a, $b) = @_;
+    open(DT, "git-diff-tree -r -t $a $b|") || die "diff-tree failed";
+    while(<DT>) {
+        chomp;
+	my @words = split;
+	my $sha1 = $words[2];
+	my $change = $words[0];
+	if ($change =~ m/^\*/) {
+	    @words = split("->", $sha1);
+	    add_delta($words[0], $words[1]);
+	} elsif ($change =~ m/^\-/) {
+	    next;
+	} else {
+	    print "$sha1\n";
+	}
+    }
+    close(DT);
+    if ($? && ($ret = $? >> 8)) {
+	die "git-diff-tree failed with $ret";
+    }
+    return 0;
+}
+
+for ($i = 0 ; $i < $argc ; $i++)  {
+    if ($ARGV[$i] eq "-c") {
+    	if ($i == $argc - 1) {
+	    print_usage();
+	}
+	$commit = $ARGV[++$i];
+    } elsif ($ARGV[$i] eq "-s") {
+    	if ($i == $argc - 1) {
+	    print_usage();
+	}
+	$stop = $ARGV[++$i];
+    } elsif ($ARGV[$i] eq "-t") {
+        if ($argc != 3 || $i != 0) {
+	    print_usage();
+	}
+	if (test_diff($ARGV[1], $ARGV[2])) {
+	    die "test_diff failed\n";
+	}
+	add_delta($ARGV[1], $ARGV[2]);
+	exit(0);
+    }
+}
+
+if (!defined($commit)) {
+    $commit = `commit-id`;
+    if ($?) {
+    	print STDERR "commit-id failed, try using -c to specify a commit\n";
+	exit(1);
+    }
+    chomp $commit;
+}
+
+open(RL, "git-rev-list $commit|") || die "rev-list failed";
+while(<RL>) {
+    chomp;
+    my $cur = $_;
+    my $cur_tree;
+    my $parent_tree;
+    my $parent_commit = undef;
+    open(PARENT, "git-cat-file commit $cur|") || die "cat-file failed";
+    while(<PARENT>) {
+        chomp;
+	my @words = split;
+	if ($words[0] eq "tree") {
+	    $cur_tree = $words[1];
+	    next;
+	} elsif ($words[0] ne "parent") {
+	    last;
+	}
+	$parent_commit = $words[1];
+	my $next = <PARENT>;
+	# ignore merge sets for now
+	if ($next =~ m/^parent/) {
+	    last;
+	}
+	if (test_diff($words[1], $cur)) {
+	    die "test_diff failed\n";
+	}
+	$parent_tree = find_tree($words[1]);
+	if (!defined($parent_tree)) {
+	    die "failed to find tree for $words[1]\n";
+	}
+	add_delta($parent_tree, $cur_tree);
+	print "$cur\n";
+	last;
+    }
+    close(PARENT);
+    if (!defined($parent_commit)) {
+        print STDERR "parentless commit $cur\n";
+    }
+    if ($? && ($ret = $? >> 8)) {
+        die "cat-file failed with $ret";
+    }
+    if ($cur eq $stop) {
+        last;
+    }
+}
+close(RL);
+
+if ($? && ($ret = $? >> 8)) {
+    die "rev-list failed with $ret";
+}
+
diff -urN linus.delta/Makefile linus/Makefile
--- linus.delta/Makefile	2005-05-17 16:52:56.698760896 -0400
+++ linus/Makefile	2005-05-17 16:50:48.335275112 -0400
@@ -22,7 +22,7 @@
 	git-unpack-file git-export git-diff-cache git-convert-cache \
 	git-http-pull git-rpush git-rpull git-rev-list git-mktag \
 	git-diff-tree-helper git-tar-tree git-local-pull git-write-blob \
-	git-mkdelta
+	git-mkdelta git-pack
 
 all: $(PROG)
 
diff -urN linus.delta/mkdelta.c linus/mkdelta.c
--- linus.delta/mkdelta.c	2005-05-17 16:52:56.700760592 -0400
+++ linus/mkdelta.c	2005-05-16 17:21:37.000000000 -0400
@@ -95,7 +95,7 @@
 	unsigned long mapsize;
 	void *map = map_sha1_file(sha1, &mapsize);
 	if (map) {
-		void *buffer = unpack_sha1_file(map, mapsize, type, size);
+		void *buffer = raw_unpack_sha1_file(map, mapsize, type, size);
 		munmap(map, mapsize);
 		if (buffer)
 			return buffer;
diff -urN linus.delta/object.c linus/object.c
--- linus.delta/object.c	2005-05-17 16:51:51.419684824 -0400
+++ linus/object.c	2005-05-17 15:16:52.291084064 -0400
@@ -107,7 +107,7 @@
 		struct object *obj;
 		char type[100];
 		unsigned long size;
-		void *buffer = unpack_sha1_file(map, mapsize, type, &size);
+		void *buffer = unpack_sha1_file(sha1, map, mapsize, type, &size, NULL, NULL);
 		munmap(map, mapsize);
 		if (!buffer)
 			return NULL;
diff -urN linus.delta/pack.c linus/pack.c
--- linus.delta/pack.c	1969-12-31 19:00:00.000000000 -0500
+++ linus/pack.c	2005-05-17 17:13:44.615048784 -0400
@@ -0,0 +1,122 @@
+/*
+ * pack and delta files in a GIT database
+ * (C) 2005 Chris Mason <mason@suse.com>
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include "cache.h"
+#include "delta.h"
+
+static char *pack_usage = "pack [ --max-depth=N ] [<reference_sha1>:]<target_sha1> [ <next_sha1> ... ]";
+
+static int pack_sha1(unsigned char *sha1, unsigned char *refsha1, 
+		     struct packed_item **head, struct packed_item **tail, 
+		     unsigned long *packed_size, unsigned long *packed_nr, 
+		     int max_depth)
+{
+	struct packed_item *item;
+	char *buffer;
+	unsigned long size;
+	int ret;
+	char type[20];
+	unsigned char retsha1[20];
+
+	buffer = read_sha1_file(sha1, type, &size);
+	if (!buffer) {
+		fprintf(stderr, "failed to read %s\n", sha1_to_hex(sha1));
+		return -1;
+	}
+	ret = pack_sha1_buffer(buffer, size, type, retsha1, refsha1, &item, max_depth);
+	free(buffer);
+	if (memcmp(sha1, retsha1, 20)) {
+		fprintf(stderr, "retsha1 %s ", sha1_to_hex(retsha1));
+		fprintf(stderr, "sha1 %s\n", sha1_to_hex(sha1));
+		return -1;
+	}
+	if (memcmp(item->sha1, sha1, 20)) {
+		fprintf(stderr, "item sha1 %s ", sha1_to_hex(item->sha1));
+		fprintf(stderr, "sha1 %s\n", sha1_to_hex(sha1));
+		return -1;
+	}
+	if (ret)
+		return ret;
+	if (item) {
+		if (*tail)
+			(*tail)->next = item;
+		*tail = item;
+		if (!*head)
+			*head = item;
+		*packed_size += item->len;
+		(*packed_nr)++;
+		if (*packed_size > (512 * 1024) || *packed_nr > 1024) {
+			ret = write_packed_list(*head);
+			if (ret)
+				return ret;
+			*head = NULL;
+			*tail = NULL;
+			*packed_size = 0;
+			*packed_nr = 0;
+		}
+	}
+	return 0;
+}
+int main(int argc, char **argv)
+{
+	int i;
+	struct packed_item *head = NULL;
+	struct packed_item *tail = NULL;
+	unsigned long packed_size = 0;
+	unsigned long packed_nr = 0;
+	int verbose;
+	int depth_max = 16;
+	int ret;
+
+	for (i = 1; i < argc; i++) {
+		if (!strcmp(argv[i], "-v")) {
+			verbose = 1;
+		} else if (!strcmp(argv[i], "-d") && i+1 < argc) {
+			depth_max = atoi(argv[++i]);
+		} else if (!strncmp(argv[i], "--max-depth=", 12)) {
+			depth_max = atoi(argv[i]+12);
+		} else
+			break;
+	}
+	if (i == argc)
+		usage(pack_usage);
+	while(i < argc) {
+		unsigned char sha1[20];
+		unsigned char refsha1[20];
+		unsigned char *target;
+		unsigned char *ref = NULL;
+		target = strchr(argv[i], ':');
+		if (target) {
+			*target = '\0';
+			target++;
+			ref = argv[i];
+		} else {
+			target = argv[i];
+		}
+		if (get_sha1_hex(target, sha1)) {
+			fprintf(stderr, "unable to parse sha1 %s\n", argv[i]);
+			exit(1);
+		}
+		if (ref) {
+			if (get_sha1_hex(ref, refsha1)) {
+				fprintf(stderr, "unable to parse sha1 %s\n", argv[i]);
+				exit(1);
+			}
+			ref = refsha1;
+		}
+		ret = pack_sha1(sha1, ref, &head, &tail, &packed_size, &packed_nr, depth_max);
+		if (ret) {
+			fprintf(stderr, "pack_sha1 failed! %d\n", ret);
+			exit(1);
+		}
+		i++;
+	}
+
+	if (head)
+		write_packed_list(head);
+	return 0;
+}
diff -urN linus.delta/sha1_file.c linus/sha1_file.c
--- linus.delta/sha1_file.c	2005-05-17 16:52:56.697761048 -0400
+++ linus/sha1_file.c	2005-05-17 18:16:54.004973952 -0400
@@ -180,39 +180,132 @@
 	return map;
 }
 
-void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size)
+/*
+ * looks through buf for the header entry corresponding to sha1.  returns
+ * 0 an entry is found and sets offset to the offset of the packed item
+ * in the file.  The offset is relative to the start of the packed items
+ * so you have to add in the length of the header before using it
+ * -1 is returned if the sha1 could not be found
+ */
+static int find_packed_header(const unsigned char *sha1, char *buf, unsigned long buf_len, unsigned long *offset)
+{
+	char *p;
+	p = buf;
+
+	*offset = 0;
+	while(p < buf + buf_len) {
+		unsigned long item_len;
+		unsigned char item_sha[20];
+		memcpy(item_sha, p, 20);
+		sscanf(p + 20, "%lu", &item_len);
+		p += 20 + strlen(p + 20) + 1;
+		if (memcmp(item_sha, sha1, 20) == 0)
+			return 0;
+		*offset += item_len;
+	}
+	return -1;
+}
+
+
+/*
+ * uncompresses a data segment without any extra delta/packed processing
+ */
+static void * _unpack_sha1_file(z_stream *stream, void *map, 
+                                unsigned long mapsize, char *type, 
+				unsigned long *size)
 {
 	int ret, bytes;
-	z_stream stream;
 	char buffer[8192];
 	char *buf;
 
 	/* Get the data stream */
-	memset(&stream, 0, sizeof(stream));
-	stream.next_in = map;
-	stream.avail_in = mapsize;
-	stream.next_out = buffer;
-	stream.avail_out = sizeof(buffer);
-
-	inflateInit(&stream);
-	ret = inflate(&stream, 0);
-	if (ret < Z_OK)
+	memset(stream, 0, sizeof(*stream));
+	stream->next_in = map;
+	stream->avail_in = mapsize;
+	stream->next_out = buffer;
+	stream->avail_out = sizeof(buffer);
+
+	inflateInit(stream);
+	ret = inflate(stream, 0);
+	if (ret < Z_OK) {
 		return NULL;
-	if (sscanf(buffer, "%10s %lu", type, size) != 2)
+	}
+	if (sscanf(buffer, "%10s %lu", type, size) != 2) {
 		return NULL;
-
+	}
 	bytes = strlen(buffer) + 1;
 	buf = xmalloc(*size);
 
-	memcpy(buf, buffer + bytes, stream.total_out - bytes);
-	bytes = stream.total_out - bytes;
+	memcpy(buf, buffer + bytes, stream->total_out - bytes);
+	bytes = stream->total_out - bytes;
 	if (bytes < *size && ret == Z_OK) {
-		stream.next_out = buf + bytes;
-		stream.avail_out = *size - bytes;
-		while (inflate(&stream, Z_FINISH) == Z_OK)
+		stream->next_out = buf + bytes;
+		stream->avail_out = *size - bytes;
+		while (inflate(stream, Z_FINISH) == Z_OK)
 			/* nothing */;
 	}
-	inflateEnd(&stream);
+	inflateEnd(stream);
+	return buf;
+}
+
+void * raw_unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned long *size)
+{
+	z_stream stream;
+	return _unpack_sha1_file(&stream, map, mapsize, type, size);
+}
+
+void * unpack_sha1_file(const unsigned char *sha1, void *map, 
+			unsigned long mapsize, char *type, unsigned long *size, 
+			const unsigned char *recur_sha1,
+			int *chain)
+{
+	z_stream stream;
+	char *buf;
+	unsigned long offset;
+	unsigned long header_len;
+	buf = _unpack_sha1_file(&stream, map, mapsize, type, size);
+	if (!buf)
+		return buf;
+	if (!strcmp(type, "delta")) {
+		char *delta_ref;
+		unsigned long delta_size;
+		char *newbuf;
+		unsigned long newsize;
+		if (recur_sha1 && memcmp(buf, recur_sha1, 20) == 0) {
+			free(buf);
+			return NULL;
+		}
+		if (chain)
+			*chain += 1;
+		delta_ref = read_sha1_delta_ref(buf, type, &delta_size, recur_sha1, chain);
+		if (!delta_ref) {
+			fprintf(stderr, "failed to read delta %s\n", sha1_to_hex(buf));
+			free(buf);
+			return NULL;
+		}
+		newbuf = patch_delta(delta_ref, delta_size, buf+20, *size-20, &newsize);
+		if (!newbuf) {
+			fprintf(stderr, "patch_delta failed %s %lu\n", sha1_to_hex(buf), delta_size);
+		}
+		free(buf);
+		free(delta_ref);
+		*size = newsize;
+		return newbuf;
+
+	} else if (!strcmp(type, "packed")) {
+		if (!sha1) {
+			free(buf);
+			return NULL;
+		}
+		header_len = *size;
+		if (find_packed_header(sha1, buf, header_len, &offset)) {
+			free(buf);
+			return NULL;
+		}
+		offset += stream.total_in;
+		free(buf);
+		buf = unpack_sha1_file(sha1, map+offset, mapsize-offset, type, size, recur_sha1, chain);
+	}
 	return buf;
 }
 
@@ -223,21 +316,26 @@
 
 	map = map_sha1_file(sha1, &mapsize);
 	if (map) {
-		buf = unpack_sha1_file(map, mapsize, type, size);
+		buf = unpack_sha1_file(sha1, map, mapsize, type, size, NULL, NULL);
+		munmap(map, mapsize);
+		return buf;
+	}
+	return NULL;
+}
+
+/*
+ * the same as read_sha1_file except chain is used to count the length
+ * of any delta chains hit while unpacking
+ */
+void * read_sha1_delta_ref(const unsigned char *sha1, char *type, unsigned long *size, const unsigned char *recur_sha1, int *chain)
+{
+	unsigned long mapsize;
+	void *map, *buf;
+
+	map = map_sha1_file(sha1, &mapsize);
+	if (map) {
+		buf = unpack_sha1_file(sha1, map, mapsize, type, size, recur_sha1, chain);
 		munmap(map, mapsize);
-		if (buf && !strcmp(type, "delta")) {
-			void *ref = NULL, *delta = buf;
-			unsigned long ref_size, delta_size = *size;
-			buf = NULL;
-			if (delta_size > 20)
-				ref = read_sha1_file(delta, type, &ref_size);
-			if (ref)
-				buf = patch_delta(ref, ref_size,
-						  delta+20, delta_size-20, 
-						  size);
-			free(delta);
-			free(ref);
-		}
 		return buf;
 	}
 	return NULL;
@@ -482,3 +580,322 @@
 		munmap(buf, size);
 	return ret;
 }
+
+static void *compress_buffer(void *buf, unsigned long buf_len, char *metadata, 
+                             int metadata_size, unsigned long *compsize)
+{
+	char *compressed;
+	z_stream stream;
+	unsigned long size;
+
+	/* Set it up */
+	memset(&stream, 0, sizeof(stream));
+	size = deflateBound(&stream, buf_len + metadata_size);
+	compressed = xmalloc(size);
+
+	/*
+	 * ASCII size + nul byte
+	 */	
+	stream.next_in = metadata;
+	stream.avail_in = metadata_size;
+	stream.next_out = compressed;
+	stream.avail_out = size;
+	deflateInit(&stream, Z_BEST_COMPRESSION);
+	while (deflate(&stream, 0) == Z_OK)
+		/* nothing */;
+
+	stream.next_in = buf;
+	stream.avail_in = buf_len;
+	/* Compress it */
+	while (deflate(&stream, Z_FINISH) == Z_OK)
+		/* nothing */;
+	deflateEnd(&stream);
+	size = stream.total_out;
+	*compsize = size;
+	return compressed;
+}
+
+/*
+ * generates a delta for buf against refsha1 and returns a compressed buffer
+ * with the results.  NULL is returned on error, or when the delta could
+ * not be done.  This might happen if the delta is larger then either the
+ * refsha1 or the buffer, or the delta chain is too long.
+ */
+void *delta_buffer(void *buf, unsigned long buf_len, char *metadata, 
+                   int metadata_size, unsigned long *compsize, 
+		   unsigned char *sha1, unsigned char *refsha1, int max_chain)
+{
+	char *compressed;
+	char *refbuffer = NULL;
+	char reftype[20];
+	unsigned long refsize = 0;
+	char *delta;
+	unsigned long delta_size;
+	char *lmetadata = xmalloc(220);
+	unsigned long lmetadata_size;
+	int chain_length = 0;
+
+	if (buf_len == 0)
+		return NULL;
+	refbuffer = read_sha1_delta_ref(refsha1, reftype, &refsize, sha1, &chain_length);
+
+	if (chain_length > max_chain) {
+		free(refbuffer);
+		return NULL;
+	}
+	/* note, we could just continue without the delta here */
+	if (!refbuffer) {
+		free(refbuffer);
+		return NULL;
+	}
+	delta = diff_delta(refbuffer, refsize, buf, buf_len, &delta_size);
+	free(refbuffer);
+	if (!delta)
+		return NULL;
+	if (delta_size > refsize || delta_size > buf_len) {
+		free(delta);
+		return NULL;
+	}
+	if (delta_size < 10) {
+		free(delta);
+		return NULL;
+	}
+	lmetadata_size = 1 + sprintf(lmetadata, "%s %lu","delta",delta_size+20);
+	memcpy(lmetadata + lmetadata_size, refsha1, 20);
+	lmetadata_size += 20;
+	compressed = compress_buffer(delta, delta_size, lmetadata, lmetadata_size, compsize);
+	free(lmetadata);
+	free(delta);
+	return compressed;
+}
+
+/*
+ * returns a newly malloc'd packed item with a compressed buffer for buf.  
+ * If refsha1 is non-null, attempts a delta against it.  The sha1 of buf 
+ * is returned via returnsha1.
+ */
+int pack_sha1_buffer(void *buf, unsigned long buf_len, char *type,
+		     unsigned char *returnsha1,
+		     unsigned char *refsha1,
+		     struct packed_item **packed_item, int max_depth)
+{
+	unsigned char sha1[20];
+	SHA_CTX c;
+	char *compressed = NULL;
+	unsigned long size;
+	struct packed_item *item;
+	char *metadata = xmalloc(200);
+	int metadata_size;
+
+	*packed_item = NULL;
+
+	metadata_size = 1 + sprintf(metadata, "%s %lu", type, buf_len);
+
+	/* Sha1.. */
+	SHA1_Init(&c);
+	SHA1_Update(&c, metadata, metadata_size);
+	SHA1_Update(&c, buf, buf_len);
+	SHA1_Final(sha1, &c);
+
+	if (returnsha1)
+		memcpy(returnsha1, sha1, 20);
+
+	if (refsha1)
+		compressed = delta_buffer(buf, buf_len, metadata, 
+		                          metadata_size, &size, sha1, 
+					  refsha1, max_depth);
+	if (!compressed)
+		compressed = compress_buffer(buf, buf_len, metadata, 
+		                             metadata_size, &size);
+	free(metadata);
+	if (!compressed)
+		return -1;
+
+	item = xmalloc(sizeof(struct packed_item));
+	memcpy(item->sha1, sha1, 20);
+	item->len = size;
+	item->next = NULL;
+	item->data = compressed;
+	*packed_item = item;
+	return 0;
+}
+
+static char *create_packed_header(struct packed_item *head, unsigned long *size)
+{
+	char *metadata = NULL;
+	int metadata_size = 0;
+	*size = 0;
+	int entry_size = 0;
+
+	while(head) {
+		char *p;
+		metadata = realloc(metadata, metadata_size + 220);
+		if (!metadata)
+			return NULL;
+		p = metadata+metadata_size;
+		memcpy(p, head->sha1, 20);
+		p += 20;
+		entry_size = 1 + sprintf(p, "%lu", head->len);
+		metadata_size += entry_size + 20;
+		head = head->next;
+	}
+	*size = metadata_size;
+	return metadata;
+}
+
+#define WRITE_BUFFER_SIZE 8192
+static char write_buffer[WRITE_BUFFER_SIZE];
+static unsigned long write_buffer_len;
+
+static int c_write(int fd, void *data, unsigned int len)
+{
+	while (len) {
+		unsigned int buffered = write_buffer_len;
+		unsigned int partial = WRITE_BUFFER_SIZE - buffered;
+		if (partial > len)
+			partial = len;
+		memcpy(write_buffer + buffered, data, partial);
+		buffered += partial;
+		if (buffered == WRITE_BUFFER_SIZE) {
+			if (write(fd, write_buffer, WRITE_BUFFER_SIZE) != WRITE_BUFFER_SIZE)
+				return -1;
+			buffered = 0;
+		}
+		write_buffer_len = buffered;
+		len -= partial;
+		data += partial;
+ 	}
+ 	return 0;
+}
+
+static int c_flush(int fd)
+{
+	if (write_buffer_len) {
+		int left = write_buffer_len;
+		if (write(fd, write_buffer, left) != left)
+			return -1;
+		write_buffer_len = 0;
+	}
+	return 0;
+}
+
+/*
+ * creates a new packed file for all the items in head.  hard links are
+ * made from the sha1 of all the items back to the packd file, and then
+ * the packed file is unlinked.
+ */
+int write_packed_list(struct packed_item *head)
+{
+	unsigned char sha1[20];
+	SHA_CTX c;
+	char filename[PATH_MAX];
+	char *metadata = xmalloc(200);
+	char *header;
+	int metadata_size;
+	int fd;
+	int ret = 0;
+	unsigned long header_len;
+	struct packed_item *item;
+	char *compressed;
+	z_stream stream;
+	unsigned long size;
+
+	header = create_packed_header(head, &header_len);
+	metadata_size = 1+sprintf(metadata, "packed %lu", header_len);
+	/* 
+	 * the header contains the sha1 of each item, so we only sha1 the
+	 * header
+	 */ 
+	SHA1_Init(&c);
+	SHA1_Update(&c, metadata, metadata_size);
+	SHA1_Update(&c, header, header_len);
+	SHA1_Final(sha1, &c);
+
+	if (access(sha1_file_name(sha1), F_OK) == 0)
+		goto out_nofile;
+
+	snprintf(filename, sizeof(filename), "%s/obj_XXXXXX", get_object_directory());
+	fd = mkstemp(filename);
+	if (fd < 0) {
+		ret = -errno;
+		goto out_nofile;
+	}
+
+       /* compress just the header info */
+        memset(&stream, 0, sizeof(stream));
+        deflateInit(&stream, Z_BEST_COMPRESSION);
+	size = deflateBound(&stream, header_len + metadata_size);
+        compressed = xmalloc(size);
+
+        stream.next_in = metadata;
+        stream.avail_in = metadata_size;
+        stream.next_out = compressed;
+        stream.avail_out = size;
+        while (deflate(&stream, 0) == Z_OK)
+                /* nothing */;
+        stream.next_in = header;
+        stream.avail_in = header_len;
+        while (deflate(&stream, Z_FINISH) == Z_OK)
+                /* nothing */;
+        deflateEnd(&stream);
+        size = stream.total_out;
+
+	c_write(fd, compressed, size);
+	free(compressed);
+
+	item = head;
+	while(item) {
+		if (c_write(fd, item->data, item->len)) {
+			ret = -EIO;
+			goto out;
+		}
+		item = item->next;
+	}
+	if (c_flush(fd)) {
+		ret = -EIO;
+		goto out;
+	}
+	item = head;
+	while(item) {
+		char *item_file;
+		char item_tmp[PATH_MAX];
+		struct packed_item *next = item->next;
+		int name_iter = 0;
+		item_file = sha1_file_name(item->sha1);
+		while(1) {
+			/* ugly stuff.  We want to atomically replace any old objects
+			 * with the same sha1, making sure they don't get deleted
+			 * if any step along the way fails
+			 */
+			snprintf(item_tmp, sizeof(item_tmp), "%s/obj_%d", get_object_directory(), name_iter);
+			if (link(filename, item_tmp)) {
+				if (errno != EEXIST) {
+					ret = -errno;
+					goto out;
+				}
+			} else {
+				/* link success */
+				if (rename(item_tmp, item_file)) {
+					ret = -errno;
+					goto out;
+				}
+				break;
+			}
+			if (name_iter++ > 1000) {
+				ret = -1;
+				goto out;
+			}
+		}
+		free(item->data);
+		free(item);
+		item = next;
+	}
+out:
+	unlink(filename);
+	fchmod(fd, 0444);
+	close(fd);
+out_nofile:
+	free(header);
+	free(metadata);
+	return ret;
+}

^ permalink raw reply

* Re: [PATCH] improved delta support for git
From: Dan Holmsand @ 2005-05-17 21:43 UTC (permalink / raw)
  To: git
In-Reply-To: <Pine.LNX.4.62.0505112309480.5426@localhost.localdomain>

[-- Attachment #1: Type: text/plain, Size: 3856 bytes --]


Nicolas (and others),

I've been trying out your delta stuff as well. It was a bit
disappointing at first, but some tweaking payed off in the end...

First, I tried the entire bkcvs history for 2.6, but storing only the
"fs" directory tree in git (hoping that would be representative
enough, since the entire tree gets *big*). I got 4678 commits.

In its original form, it looks like this (first size is "network
size", the last one disk size on ext3. Average size per object in
bytes):

trees:  16M  (15684 files)  avg: 1119, disk:  61M
blobs: 121M  (17200 files)  avg: 7414, disk: 157M
Total: 139M  (37562 files)  avg: 3883, disk: 237M

Using your code, with unlimited delta depth:

trees:  16M  (15684 files)  avg: 1119, disk:  61M
blobs:   9M   (2333 files)  avg: 4491, disk:  15M
deltas: 30M  (14867 files)  avg: 2147, disk:  71M
Total:  83M  (37562 files)  avg: 2334, disk: 188M

Same thing, with a maximum delta depth of 2:

trees:  16M  (15684 files)  avg: 1119, disk:  61M
blobs:  45M   (6940 files)  avg: 6906, disk:  60M
deltas: 20M  (10260 files)  avg: 2086, disk:  48M
Total:  83M  (37562 files)  avg: 2334, disk: 188M

So, total size from a network perspective went from 139M to 83M, which
seemed a little disappointing to me.

I think there are too reasons, as shown by these statistics:

1) Too many deltas get too big and/or compress badly.

2) Trees take up a big chunk of total space.


Therefore, I tried some other approaches. This one seemed to work
best:

1) I limit the maximum size of any delta to 10% of the size of the new
version. That guarantees a big saving, as long as any delta is
produced.

2) If the "previous" version of a blob is a delta, I produce the new
delta form the old deltas base version. This works surprisingly well.
I'm guessing the reason for this is that most changes are really
small, and they tend to be in the same area as a previous change (as
in "Commit new feature. Commit bugfix for new feature. Commit fix for
bugfix of new feature. Delete new feature as it doesn't work...").

3) I use the same method for all tree objects.

This method of "opportunistic delta compression" has some other
advantages: No risk of long delta chains (as the maximum delta depth
is one). It should be disk cache friendly, as many deltas are produced
against the same base version. And this method could easily be used
incrementally, or "on the fly", as forward deltas are used.

Using these tweaks helped a lot, size wise:

trees:   3M   (9746 files)  avg:  380, disk:  38M
blobs:  13M   (3301 files)  avg: 4208, disk:  20M
deltas: 11M  (19837 files)  avg:  586, disk:  78M
Total:  28M  (37562 files)  avg:  799, disk: 155M

As this method turned 139M worth of git repository into 28M, I decided
to try the same method on the entire bkcvs history (28203 commits).

Plain vanilla git looks like this:

trees:  246M  (156812 files)  avg: 1647, disk:  699M
blobs: 1171M  (185458 files)  avg: 6623, disk: 1573M
Total: 1422M  (370473 files)  avg: 4025, disk: 2382M

The delta compressed approach outlined above yields:

trees:   47M   (73519 files)  avg:  672, disk:  289M
blobs:  156M   (49857 files)  avg: 3285, disk:  281M
deltas: 107M  (218894 files)  avg:  515, disk:  863M
Total:  315M  (370473 files)  avg:  892, disk: 1544M

So, 1.4G became 315M. Not too bad, IMHO. Disk size is still big,
of course, but disks are apparently cheap these days.

It could probably be even better, if git didn't produce quite as many
tree objects. Some sort of chunking together of tree objects would
help delta compression a lot (and improve disk size quite a bit in the
process).

Attached is a patch (against current cogito). It is basically the same
as yours, Nicolas, except for some hackery to make the above possible.
I'm sure I've made lots of stupid mistakes in it (and the 10% limit is
hardcoded right now; I'm lazy).

/dan

[-- Attachment #2: delta.patch --]
[-- Type: text/x-patch, Size: 26038 bytes --]

Index: Makefile
===================================================================
--- 4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2/Makefile  (mode:100644)
+++ uncommitted/Makefile  (mode:100644)
@@ -35,7 +35,7 @@
 INSTALL?=install
 
 SCRIPTS=git-apply-patch-script git-merge-one-file-script git-prune-script \
-	git-pull-script git-tag-script git-resolve-script
+	git-pull-script git-tag-script git-resolve-script git-deltafy-script
 
 PROG=   git-update-cache git-diff-files git-init-db git-write-tree \
 	git-read-tree git-commit-tree git-cat-file git-fsck-cache \
@@ -44,7 +44,7 @@
 	git-unpack-file git-export git-diff-cache git-convert-cache \
 	git-http-pull git-rpush git-rpull git-rev-list git-mktag \
 	git-diff-helper git-tar-tree git-local-pull git-write-blob \
-	git-get-tar-commit-id
+	git-get-tar-commit-id git-mkdelta
 
 SCRIPT=	commit-id tree-id parent-id cg-add cg-admin-lsobj cg-admin-uncommit \
 	cg-branch-add cg-branch-ls cg-cancel cg-clone cg-commit cg-diff \
@@ -60,7 +60,7 @@
 COMMON=	read-cache.o
 
 LIB_OBJS=read-cache.o sha1_file.o usage.o object.o commit.o tree.o blob.o \
-	 tag.o date.o
+	 tag.o date.o diff-delta.o patch-delta.o
 LIB_FILE=libgit.a
 LIB_H=cache.h object.h blob.h tree.h commit.h tag.h
 
@@ -94,6 +94,9 @@
 all: $(PROG) $(GEN_SCRIPT)
 
 
+test-delta: test-delta.c diff-delta.o patch-delta.o
+	$(CC) $(CFLAGS) -o $@ $^
+
 git-%: %.c $(LIB_FILE)
 	$(CC) $(CFLAGS) -o $@ $(filter %.c,$^) $(LIBS)
 
Index: delta.h
===================================================================
--- /dev/null  (tree:4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2)
+++ uncommitted/delta.h  (mode:100644)
@@ -0,0 +1,6 @@
+extern void *diff_delta(void *from_buf, unsigned long from_size,
+			void *to_buf, unsigned long to_size,
+		        unsigned long *delta_size);
+extern void *patch_delta(void *src_buf, unsigned long src_size,
+			 void *delta_buf, unsigned long delta_size,
+			 unsigned long *dst_size);
Index: diff-delta.c
===================================================================
--- /dev/null  (tree:4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2)
+++ uncommitted/diff-delta.c  (mode:100644)
@@ -0,0 +1,330 @@
+/*
+ * diff-delta.c: generate a delta between two buffers
+ *
+ *  Many parts of this file have been lifted from LibXDiff version 0.10.
+ *  http://www.xmailserver.org/xdiff-lib.html
+ *
+ *  LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
+ *  Copyright (C) 2003	Davide Libenzi
+ *
+ *  Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
+ *
+ *  This file is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+#include "delta.h"
+
+
+/* block size: min = 16, max = 64k, power of 2 */
+#define BLK_SIZE 16
+
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+#define GR_PRIME 0x9e370001
+#define HASH(v, b) (((unsigned int)(v) * GR_PRIME) >> (32 - (b)))
+	
+/* largest prime smaller than 65536 */
+#define BASE 65521
+
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+#define NMAX 5552
+
+#define DO1(buf, i)  { s1 += buf[i]; s2 += s1; }
+#define DO2(buf, i)  DO1(buf, i); DO1(buf, i + 1);
+#define DO4(buf, i)  DO2(buf, i); DO2(buf, i + 2);
+#define DO8(buf, i)  DO4(buf, i); DO4(buf, i + 4);
+#define DO16(buf)    DO8(buf, 0); DO8(buf, 8);
+
+static unsigned int adler32(unsigned int adler, const unsigned char *buf, int len)
+{
+	int k;
+	unsigned int s1 = adler & 0xffff;
+	unsigned int s2 = adler >> 16;
+
+	while (len > 0) {
+		k = MIN(len, NMAX);
+		len -= k;
+		while (k >= 16) {
+			DO16(buf);
+			buf += 16;
+			k -= 16;
+		}
+		if (k != 0)
+			do {
+				s1 += *buf++;
+				s2 += s1;
+			} while (--k);
+		s1 %= BASE;
+		s2 %= BASE;
+	}
+
+	return (s2 << 16) | s1;
+}
+
+static unsigned int hashbits(unsigned int size)
+{
+	unsigned int val = 1, bits = 0;
+	while (val < size && bits < 32) {
+		val <<= 1;
+	       	bits++;
+	}
+	return bits ? bits: 1;
+}
+
+typedef struct s_chanode {
+	struct s_chanode *next;
+	int icurr;
+} chanode_t;
+
+typedef struct s_chastore {
+	chanode_t *head, *tail;
+	int isize, nsize;
+	chanode_t *ancur;
+	chanode_t *sncur;
+	int scurr;
+} chastore_t;
+
+static void cha_init(chastore_t *cha, int isize, int icount)
+{
+	cha->head = cha->tail = NULL;
+	cha->isize = isize;
+	cha->nsize = icount * isize;
+	cha->ancur = cha->sncur = NULL;
+	cha->scurr = 0;
+}
+
+static void *cha_alloc(chastore_t *cha)
+{
+	chanode_t *ancur;
+	void *data;
+
+	ancur = cha->ancur;
+	if (!ancur || ancur->icurr == cha->nsize) {
+		ancur = malloc(sizeof(chanode_t) + cha->nsize);
+		if (!ancur)
+			return NULL;
+		ancur->icurr = 0;
+		ancur->next = NULL;
+		if (cha->tail)
+			cha->tail->next = ancur;
+		if (!cha->head)
+			cha->head = ancur;
+		cha->tail = ancur;
+		cha->ancur = ancur;
+	}
+
+	data = (void *)ancur + sizeof(chanode_t) + ancur->icurr;
+	ancur->icurr += cha->isize;
+	return data;
+}
+
+static void cha_free(chastore_t *cha)
+{
+	chanode_t *cur = cha->head;
+	while (cur) {
+		chanode_t *tmp = cur;
+		cur = cur->next;
+		free(tmp);
+	}
+}
+
+typedef struct s_bdrecord {
+	struct s_bdrecord *next;
+	unsigned int fp;
+	const unsigned char *ptr;
+} bdrecord_t;
+
+typedef struct s_bdfile {
+	const unsigned char *data, *top;
+	chastore_t cha;
+	unsigned int fphbits;
+	bdrecord_t **fphash;
+} bdfile_t;
+
+static int delta_prepare(const unsigned char *buf, int bufsize, bdfile_t *bdf)
+{
+	unsigned int fphbits;
+	int i, hsize;
+	const unsigned char *base, *data, *top;
+	bdrecord_t *brec;
+	bdrecord_t **fphash;
+
+	fphbits = hashbits(bufsize / BLK_SIZE + 1);
+	hsize = 1 << fphbits;
+	fphash = malloc(hsize * sizeof(bdrecord_t *));
+	if (!fphash)
+		return -1;
+	for (i = 0; i < hsize; i++)
+		fphash[i] = NULL;
+	cha_init(&bdf->cha, sizeof(bdrecord_t), hsize / 4 + 1);
+
+	bdf->data = data = base = buf;
+	bdf->top = top = buf + bufsize;
+	data += (bufsize / BLK_SIZE) * BLK_SIZE;
+	if (data == top)
+		data -= BLK_SIZE;
+
+	for ( ; data >= base; data -= BLK_SIZE) {
+		brec = cha_alloc(&bdf->cha);
+		if (!brec) {
+			cha_free(&bdf->cha);
+			free(fphash);
+			return -1;
+		}
+		brec->fp = adler32(0, data, MIN(BLK_SIZE, top - data));
+		brec->ptr = data;
+		i = HASH(brec->fp, fphbits);
+		brec->next = fphash[i];
+		fphash[i] = brec;
+	}
+
+	bdf->fphbits = fphbits;
+	bdf->fphash = fphash;
+
+	return 0;
+}
+
+static void delta_cleanup(bdfile_t *bdf)
+{
+	free(bdf->fphash);
+	cha_free(&bdf->cha);
+}
+
+#define COPYOP_SIZE(o, s) \
+    (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \
+     !!(s & 0xff) + !!(s & 0xff00) + 1)
+
+void *diff_delta(void *from_buf, unsigned long from_size,
+		 void *to_buf, unsigned long to_size,
+		 unsigned long *delta_size)
+{
+	int i, outpos, outsize, inscnt, csize, msize, moff;
+	unsigned int fp;
+	const unsigned char *data, *top, *ptr1, *ptr2;
+	unsigned char *out, *orig;
+	bdrecord_t *brec;
+	bdfile_t bdf;
+
+	if (!from_size || !to_size || delta_prepare(from_buf, from_size, &bdf))
+		return NULL;
+	
+	outpos = 0;
+	outsize = 8192;
+	out = malloc(outsize);
+	if (!out) {
+		delta_cleanup(&bdf);
+		return NULL;
+	}
+
+	data = to_buf;
+	top = to_buf + to_size;
+
+	/* store reference buffer size */
+	orig = out + outpos++;
+	*orig = i = 0;
+	do {
+		if (from_size & 0xff) {
+			*orig |= (1 << i);
+			out[outpos++] = from_size;
+		}
+		i++;
+		from_size >>= 8;
+	} while (from_size);
+
+	/* store target buffer size */
+	orig = out + outpos++;
+	*orig = i = 0;
+	do {
+		if (to_size & 0xff) {
+			*orig |= (1 << i);
+			out[outpos++] = to_size;
+		}
+		i++;
+		to_size >>= 8;
+	} while (to_size);
+
+	inscnt = 0;
+	moff = 0;
+	while (data < top) {
+		msize = 0;
+		fp = adler32(0, data, MIN(top - data, BLK_SIZE));
+		i = HASH(fp, bdf.fphbits);
+		for (brec = bdf.fphash[i]; brec; brec = brec->next) {
+			if (brec->fp == fp) {
+				csize = bdf.top - brec->ptr;
+				if (csize > top - data)
+					csize = top - data;
+				for (ptr1 = brec->ptr, ptr2 = data; 
+				     csize && *ptr1 == *ptr2;
+				     csize--, ptr1++, ptr2++);
+
+				csize = ptr1 - brec->ptr;
+				if (csize > msize) {
+					moff = brec->ptr - bdf.data;
+					msize = csize;
+					if (msize >= 0x10000) {
+						msize = 0x10000;
+						break;
+					}
+				}
+			}
+		}
+
+		if (!msize || msize < COPYOP_SIZE(moff, msize)) {
+			if (!inscnt)
+				outpos++;
+			out[outpos++] = *data++;
+			inscnt++;
+			if (inscnt == 0x7f) {
+				out[outpos - inscnt - 1] = inscnt;
+				inscnt = 0;
+			}
+		} else {
+			if (inscnt) {
+				out[outpos - inscnt - 1] = inscnt;
+				inscnt = 0;
+			}
+
+			data += msize;
+			orig = out + outpos++;
+			i = 0x80;
+
+			if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
+			moff >>= 8;
+			if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
+			moff >>= 8;
+			if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
+			moff >>= 8;
+			if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
+
+			if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
+			msize >>= 8;
+			if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
+
+			*orig = i;
+		}
+
+		/* next time around the largest possible output is 1 + 4 + 3 */
+		if (outpos > outsize - 8) {
+			void *tmp = out;
+			outsize = outsize * 3 / 2;
+			out = realloc(out, outsize);
+			if (!out) {
+				free(tmp);
+				delta_cleanup(&bdf);
+				return NULL;
+			}
+		}
+	}
+
+	if (inscnt)
+		out[outpos - inscnt - 1] = inscnt;
+
+	delta_cleanup(&bdf);
+	*delta_size = outpos;
+	return out;
+}
Index: git-deltafy-script
===================================================================
--- /dev/null  (tree:4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2)
+++ uncommitted/git-deltafy-script  (mode:100755)
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# Script to deltafy an entire GIT repository based on the commit list.
+
+# git-deltafy-script --pack deltafies a repository, from current HEAD
+# down.
+# git-deltafy-script --unpack undeltafies all objects.
+
+export LANG=C
+
+depth=
+[ "$1" == "-d" ] && depth="--max-depth=$2" && shift 2
+
+prevcommit=
+prevtree=
+
+treeid() {
+	git-cat-file commit "$1" | sed -e 's/tree //;q'
+}
+
+mkdelta() {
+	git-mkdelta --max-depth=1 -o -v "$@" || exit 1
+}
+
+if [ "$1" = --pack ] ; then
+git-rev-list HEAD | tac |
+while read commit; do
+	if [ "$prevcommit" ]; then
+		git-diff-tree -r -z $prevcommit $commit |
+		while IFS=$'\t' read -d $'\0' a1 a2 sha file; do
+			to=${sha#*->}
+			from=${sha%->*}
+			[ "$from" != "$to" ] && mkdelta $from $to 
+		done
+
+		prevdir=
+		prevsha=
+		tree=$(treeid "$commit") 
+		[ "$prevtree" ] || prevtree=$(treeid $prevcommit)
+		[ $prevtree != $tree ] && mkdelta $prevtree $tree 
+
+		( git-ls-tree -r $prevtree; git-ls-tree -r $tree ) |
+		grep $'^[0-9]*\ttree' | sort -k4 -s | uniq -u -s4 |
+		while IFS=$'\t' read a1 a2 sha dir; do
+			if [ "$prevdir" = "$dir" -a "$prevsha" != "$sha" ]; then
+				echo "deltafying tree $dir"
+				mkdelta $prevsha $sha 
+			fi
+			prevdir=$dir
+			prevsha=$sha
+		done
+
+	fi
+	prevcommit=$commit
+	prevtree=$tree
+done
+elif [ "$1" = --unpack ]; then
+	( cd .git/objects && find -type f ) | sed 's,[./],,g' | 
+	xargs git-mkdelta --max-depth=0 -v
+else
+	echo "usage: $(basename "$0") [--pack|--unpack]" >&2
+	exit 1
+fi
+exit 0
Index: mkdelta.c
===================================================================
--- /dev/null  (tree:4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2)
+++ uncommitted/mkdelta.c  (mode:100644)
@@ -0,0 +1,306 @@
+/*
+ * Deltafication of a GIT database.
+ *
+ * (C) 2005 Nicolas Pitre <nico@cam.org>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include "cache.h"
+#include "delta.h"
+
+static int replace_object(char *buf, unsigned long len, unsigned char *sha1,
+			  char *hdr, int hdrlen)
+{
+	char tmpfile[PATH_MAX];
+	int size;
+	char *compressed;
+	z_stream stream;
+	int fd;
+
+	snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory());
+	fd = mkstemp(tmpfile);
+	if (fd < 0)
+		return error("%s: %s\n", tmpfile, strerror(errno));
+	
+	/* Set it up */
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, Z_BEST_COMPRESSION);
+	size = deflateBound(&stream, len+hdrlen);
+	compressed = xmalloc(size);
+
+	/* Compress it */
+	stream.next_out = compressed;
+	stream.avail_out = size;
+
+	/* First header.. */
+	stream.next_in = hdr;
+	stream.avail_in = hdrlen;
+	while (deflate(&stream, 0) == Z_OK)
+		/* nothing */;
+
+	/* Then the data itself.. */
+	stream.next_in = buf;
+	stream.avail_in = len;
+	while (deflate(&stream, Z_FINISH) == Z_OK)
+		/* nothing */;
+	deflateEnd(&stream);
+	size = stream.total_out;
+
+	if (write(fd, compressed, size) != size) {
+		perror("unable to write file");
+		close(fd);
+		unlink(tmpfile);
+		return -1;
+	}
+	fchmod(fd, 0444);
+	close(fd);
+
+	if (rename(tmpfile, sha1_file_name(sha1))) {
+		perror("unable to replace original object");
+		unlink(tmpfile);
+		return -1;
+	}
+	return 0;
+}
+
+static int write_delta_file(char *buf, unsigned long len,
+			    unsigned char *sha1_ref, unsigned char *sha1_trg)
+{
+	char hdr[50];
+	int hdrlen;
+
+	/* Generate the header + sha1 of reference for delta */
+	hdrlen = sprintf(hdr, "delta %lu", len+20)+1;
+	memcpy(hdr + hdrlen, sha1_ref, 20);
+	hdrlen += 20;
+
+	return replace_object(buf, len, sha1_trg, hdr, hdrlen);
+}
+
+static int replace_sha1_file(char *buf, unsigned long len,
+			     char *type, unsigned char *sha1)
+{
+	char hdr[50];
+	int hdrlen;
+
+	hdrlen = sprintf(hdr, "%s %lu", type, len)+1;
+	return replace_object(buf, len, sha1, hdr, hdrlen);
+}
+
+static void *get_buffer(unsigned char *sha1, char *type, unsigned long *size)
+{
+	unsigned long mapsize;
+	void *map = map_sha1_file(sha1, &mapsize);
+	if (map) {
+		void *buffer = unpack_sha1_file(map, mapsize, type, size);
+		munmap(map, mapsize);
+		if (buffer)
+			return buffer;
+	}
+	error("unable to get object %s", sha1_to_hex(sha1));
+	return NULL;
+}
+
+static void *expand_delta(void *delta, unsigned long delta_size, char *type,
+			  unsigned long *size, unsigned int *depth, char *head)
+{
+	void *buf = NULL;
+	*depth++;
+	if (delta_size < 20) {
+		error("delta object is bad");
+		free(delta);
+	} else {
+		unsigned long ref_size;
+		void *ref = get_buffer(delta, type, &ref_size);
+		if (ref && !strcmp(type, "delta"))
+			ref = expand_delta(ref, ref_size, type, &ref_size,
+					   depth, head);
+		else
+			memcpy(head, delta, 20);
+		if (ref)
+			buf = patch_delta(ref, ref_size, delta+20,
+					  delta_size-20, size);
+		free(ref);
+		free(delta);
+	}
+	return buf;
+}
+
+static char *mkdelta_usage =
+"mkdelta [ --max-depth=N ] [ -o ] <reference_sha1> <target_sha1> [ <next_sha1> ... ]";
+
+int main(int argc, char **argv)
+{
+	unsigned char sha1_ref[20], sha1_trg[20], head_ref[20], head_trg[20];
+	char type_ref[20], type_trg[20];
+	void *buf_ref, *buf_trg, *buf_delta;
+	unsigned long size_ref, size_trg, size_orig, size_delta;
+	unsigned int depth_ref, depth_trg, depth_max = -1;
+	int i, verbose = 0, oneparent = 0;
+
+	for (i = 1; i < argc; i++) {
+		if (!strcmp(argv[i], "-v")) {
+			verbose = 1;
+		} else if (!strcmp(argv[i], "-o")) {
+			oneparent = 1;
+		} else if (!strcmp(argv[i], "-d") && i+1 < argc) {
+			depth_max = atoi(argv[++i]);
+		} else if (!strncmp(argv[i], "--max-depth=", 12)) {
+			depth_max = atoi(argv[i]+12);
+		} else
+			break;
+	}
+
+	if (i + (depth_max != 0) >= argc)
+		usage(mkdelta_usage);
+
+	if (get_sha1(argv[i], sha1_ref))
+		die("bad sha1 %s", argv[i]);
+	depth_ref = 0;
+	buf_ref = get_buffer(sha1_ref, type_ref, &size_ref);
+	if (oneparent && depth_max > 0) {
+		while (buf_ref && !strcmp(type_ref, "delta")) {
+			if (size_ref < 20)
+				die("bad delta object");
+			//printf ("getting parent for %s %i\n", 
+					//sha1_to_hex(sha1_ref), depth_max);
+			memcpy(sha1_ref, buf_ref, 20);
+			free(buf_ref);
+			//printf("loading parent %s\n",
+					//sha1_to_hex(sha1_ref));
+			buf_ref = get_buffer(sha1_ref, 
+					type_ref, &size_ref);
+			if (!buf_ref) die("broken get_buffer!");
+		}
+	}
+	if (buf_ref && !strcmp(type_ref, "delta"))
+		buf_ref = expand_delta(buf_ref, size_ref, type_ref,
+				       &size_ref, &depth_ref, head_ref);
+	else
+		memcpy(head_ref, sha1_ref, 20);
+	if (!buf_ref)
+		die("unable to obtain initial object %s", argv[i]);
+
+	if (depth_ref > depth_max) {
+		if (replace_sha1_file(buf_ref, size_ref, type_ref, sha1_ref))
+			die("unable to restore %s", argv[i]);
+		if (verbose)
+			printf("undelta %s (depth was %d)\n", argv[i], depth_ref);
+		depth_ref = 0;
+	}
+
+	while (++i < argc) {
+		if (get_sha1(argv[i], sha1_trg))
+			die("bad sha1 %s", argv[i]);
+		depth_trg = 0;
+		buf_trg = get_buffer(sha1_trg, type_trg, &size_trg);
+		if (buf_trg && !size_trg) {
+			if (verbose)
+				printf("skip    %s (object is empty)\n", argv[i]);
+			continue;
+		}
+		size_orig = size_trg;
+		if (buf_trg && !strcmp(type_trg, "delta")) {
+			if (!memcmp(buf_trg, sha1_ref, 20)) {
+				/* delta already in place */
+				depth_ref++;
+				memcpy(sha1_ref, sha1_trg, 20);
+				buf_ref = patch_delta(buf_ref, size_ref,
+						      buf_trg+20, size_trg-20,
+						      &size_ref);
+				if (!buf_ref)
+					die("unable to apply delta %s", argv[i]);
+				if (depth_ref > depth_max) {
+					if (replace_sha1_file(buf_ref, size_ref,
+							      type_ref, sha1_ref))
+						die("unable to restore %s", argv[i]);
+					if (verbose)
+						printf("undelta %s (depth was %d)\n", argv[i], depth_ref);
+					depth_ref = 0;
+					continue;
+				}
+				if (verbose)
+					printf("skip    %s (delta already in place)\n", argv[i]);
+				continue;
+			}
+			buf_trg = expand_delta(buf_trg, size_trg, type_trg,
+					       &size_trg, &depth_trg, head_trg);
+		} else
+			memcpy(head_trg, sha1_trg, 20);
+		if (!buf_trg)
+			die("unable to read target object %s", argv[i]);
+
+		if (depth_trg > depth_max || depth_max == 0) {
+			if (replace_sha1_file(buf_trg, size_trg, type_trg, sha1_trg))
+				die("unable to restore %s", argv[i]);
+			if (verbose)
+				printf("undelta %s (depth was %d)\n", argv[i], depth_trg);
+			depth_trg = 0;
+			size_orig = size_trg;
+		}
+
+		if (depth_max == 0)
+			goto skip;
+
+		if (strcmp(type_ref, type_trg))
+			die("type mismatch for object %s", argv[i]);
+
+		if (!size_ref) {
+			if (verbose)
+				printf("skip    %s (initial object is empty)\n", argv[i]);
+			goto skip;
+		}
+		
+		depth_ref++;
+		if (depth_ref > depth_max) {
+			if (verbose)
+				printf("skip    %s (exceeding max link depth)\n", argv[i]);
+			goto skip;
+		}
+
+		if (!memcmp(head_ref, sha1_trg, 20)) {
+			if (verbose)
+				printf("skip    %s (would create a loop)\n", argv[i]);
+			goto skip;
+		}
+
+		buf_delta = diff_delta(buf_ref, size_ref, buf_trg, size_trg, &size_delta);
+		if (!buf_delta)
+			die("out of memory");
+
+		//if (size_delta+20 < size_orig) {
+		if ((size_delta+20)*100/size_trg < 10) {
+			if (write_delta_file(buf_delta, size_delta,
+					     sha1_ref, sha1_trg))
+				die("unable to write delta for %s", argv[i]);
+			free(buf_delta);
+			if (verbose)
+				printf("delta   %s (size=%ld.%02ld%%, depth=%d)\n",
+				       argv[i], (size_delta+20)*100 / size_trg,
+				       ((size_delta+20)*10000 / size_trg)%100,
+				       depth_ref);
+		} else {
+			free(buf_delta);
+			if (verbose) {
+				printf("skip    %s (original is smaller)", argv[i]);
+				printf(" (size=%ld.%02ld%%, depth=%d)\n",
+				       (size_delta+20)*100 / size_trg,
+				       ((size_delta+20)*10000 / size_trg)%100,
+				       depth_ref);
+			}
+			skip:
+			depth_ref = depth_trg;
+			memcpy(head_ref, head_trg, 20);
+		}
+
+		free(buf_ref);
+		buf_ref = buf_trg;
+		size_ref = size_trg;
+		memcpy(sha1_ref, sha1_trg, 20);
+	}
+
+	return 0;
+}
Index: mktag.c
===================================================================
--- 4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2/mktag.c  (mode:100644)
+++ uncommitted/mktag.c  (mode:100644)
@@ -25,20 +25,14 @@
 static int verify_object(unsigned char *sha1, const char *expected_type)
 {
 	int ret = -1;
-	unsigned long mapsize;
-	void *map = map_sha1_file(sha1, &mapsize);
+	char type[100];
+	unsigned long size;
+	void *buffer = read_sha1_file(sha1, type, &size);
 
-	if (map) {
-		char type[100];
-		unsigned long size;
-		void *buffer = unpack_sha1_file(map, mapsize, type, &size);
-
-		if (buffer) {
-			if (!strcmp(type, expected_type))
-				ret = check_sha1_signature(sha1, buffer, size, type);
-			free(buffer);
-		}
-		munmap(map, mapsize);
+	if (buffer) {
+		if (!strcmp(type, expected_type))
+			ret = check_sha1_signature(sha1, buffer, size, type);
+		free(buffer);
 	}
 	return ret;
 }
Index: patch-delta.c
===================================================================
--- /dev/null  (tree:4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2)
+++ uncommitted/patch-delta.c  (mode:100644)
@@ -0,0 +1,88 @@
+/*
+ * patch-delta.c:
+ * recreate a buffer from a source and the delta produced by diff-delta.c
+ *
+ * (C) 2005 Nicolas Pitre <nico@cam.org>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "delta.h"
+
+void *patch_delta(void *src_buf, unsigned long src_size,
+		  void *delta_buf, unsigned long delta_size,
+		  unsigned long *dst_size)
+{
+	const unsigned char *data, *top;
+	unsigned char *dst_buf, *out, cmd;
+	unsigned long size;
+	int i;
+
+	/* the smallest delta size possible is 6 bytes */
+	if (delta_size < 6)
+		return NULL;
+
+	data = delta_buf;
+	top = delta_buf + delta_size;
+
+	/* make sure the orig file size matches what we expect */
+	size = i = 0;
+	cmd = *data++;
+	while (cmd) {
+		if (cmd & 1)
+			size |= *data++ << i;
+		i += 8;
+		cmd >>= 1;
+	}
+	if (size != src_size)
+		return NULL;
+
+	/* now the result size */
+	size = i = 0;
+	cmd = *data++;
+	while (cmd) {
+		if (cmd & 1)
+			size |= *data++ << i;
+		i += 8;
+		cmd >>= 1;
+	}
+	dst_buf = malloc(size);
+	if (!dst_buf)
+		return NULL;
+
+	out = dst_buf;
+	while (data < top) {
+		cmd = *data++;
+		if (cmd & 0x80) {
+			unsigned long cp_off = 0, cp_size = 0;
+			const unsigned char *buf;
+			if (cmd & 0x01) cp_off = *data++;
+			if (cmd & 0x02) cp_off |= (*data++ << 8);
+			if (cmd & 0x04) cp_off |= (*data++ << 16);
+			if (cmd & 0x08) cp_off |= (*data++ << 24);
+			if (cmd & 0x10) cp_size = *data++;
+			if (cmd & 0x20) cp_size |= (*data++ << 8);
+			if (cp_size == 0) cp_size = 0x10000;
+			buf = (cmd & 0x40) ? dst_buf : src_buf;
+			memcpy(out, buf + cp_off, cp_size);
+			out += cp_size;
+		} else {
+			memcpy(out, data, cmd);
+			out += cmd;
+			data += cmd;
+		}
+	}
+
+	/* sanity check */
+	if (data != top || out - dst_buf != size) {
+		free(dst_buf);
+		return NULL;
+	}
+
+	*dst_size = size;
+	return dst_buf;
+}
Index: sha1_file.c
===================================================================
--- 4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2/sha1_file.c  (mode:100644)
+++ uncommitted/sha1_file.c  (mode:100644)
@@ -9,6 +9,7 @@
 #include <stdarg.h>
 #include <limits.h>
 #include "cache.h"
+#include "delta.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -353,6 +354,19 @@
 	if (map) {
 		buf = unpack_sha1_file(map, mapsize, type, size);
 		munmap(map, mapsize);
+		if (buf && !strcmp(type, "delta")) {
+			void *ref = NULL, *delta = buf;
+			unsigned long ref_size, delta_size = *size;
+			buf = NULL;
+			if (delta_size > 20)
+				ref = read_sha1_file(delta, type, &ref_size);
+			if (ref)
+				buf = patch_delta(ref, ref_size,
+						  delta+20, delta_size-20, 
+						  size);
+			free(delta);
+			free(ref);
+		}
 		return buf;
 	}
 	return NULL;
Index: test-delta.c
===================================================================
--- /dev/null  (tree:4ef3de6ae44888d83e8c00326ddcc9f40cbd12e2)
+++ uncommitted/test-delta.c  (mode:100644)
@@ -0,0 +1,79 @@
+/*
+ * test-delta.c: test code to exercise diff-delta.c and patch-delta.c
+ *
+ * (C) 2005 Nicolas Pitre <nico@cam.org>
+ *
+ * This code is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include "delta.h"
+
+static const char *usage =
+	"test-delta (-d|-p) <from_file> <data_file> <out_file>";
+
+int main(int argc, char *argv[])
+{
+	int fd;
+	struct stat st;
+	void *from_buf, *data_buf, *out_buf;
+	unsigned long from_size, data_size, out_size;
+
+	if (argc != 5 || (strcmp(argv[1], "-d") && strcmp(argv[1], "-p"))) {
+		fprintf(stderr, "Usage: %s\n", usage);
+		return 1;
+	}
+
+	fd = open(argv[2], O_RDONLY);
+	if (fd < 0 || fstat(fd, &st)) {
+		perror(argv[2]);
+		return 1;
+	}
+	from_size = st.st_size;
+	from_buf = mmap(NULL, from_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (from_buf == MAP_FAILED) {
+		perror(argv[2]);
+		return 1;
+	}
+	close(fd);
+
+	fd = open(argv[3], O_RDONLY);
+	if (fd < 0 || fstat(fd, &st)) {
+		perror(argv[3]);
+		return 1;
+	}
+	data_size = st.st_size;
+	data_buf = mmap(NULL, data_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (data_buf == MAP_FAILED) {
+		perror(argv[3]);
+		return 1;
+	}
+	close(fd);
+
+	if (argv[1][1] == 'd')
+		out_buf = diff_delta(from_buf, from_size,
+				     data_buf, data_size, &out_size);
+	else
+		out_buf = patch_delta(from_buf, from_size,
+				      data_buf, data_size, &out_size);
+	if (!out_buf) {
+		fprintf(stderr, "delta operation failed (returned NULL)\n");
+		return 1;
+	}
+
+	fd = open (argv[4], O_WRONLY|O_CREAT|O_TRUNC, 0666);
+	if (fd < 0 || write(fd, out_buf, out_size) != out_size) {
+		perror(argv[4]);
+		return 1;
+	}
+
+	return 0;
+}

^ permalink raw reply

* Re: [PATCH 2/4] Tweak diff output further to make it a bit less distracting.
From: Junio C Hamano @ 2005-05-17 22:25 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Petr Baudis, git
In-Reply-To: <Pine.LNX.4.58.0505171227260.18337@ppc970.osdl.org>

>>>>> "LT" == Linus Torvalds <torvalds@osdl.org> writes:

LT> So I'd really suggest having just a flag that says "pure old diff format"  
LT> or "new diff format with renames", and if the latter is selected, then do
LT> _just_ the changes, ie the rename+change case would really boil down to
LT> getting just

That is sensible.  So the with --detect-rename flag, we do
rename detection and show only the changes, otherwise we do not
do rename detection and give pure old diff (two diffs against
/dev/null, that is).  I do not personally think --detect-rename
with --output-old-style-diff is useful.

Now, in the new diff format, if the rename is really a pure
rename, then we would have:

     diff -git a/nitfol b/nitfol
     rename old frotz
     rename new nitfol
     diff -git a/rezrov b/rezrov
     --- a/rezrov
     +++ b/rezrov
     @@ ...

that is, nothing until the patch for the next file or EOF.  Is
this acceptable?

^ permalink raw reply

* Re: [PATCH 0/4] Pulling refs files
From: Daniel Barkalow @ 2005-05-17 22:20 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git, Linus Torvalds
In-Reply-To: <20050517214533.GP7136@pasky.ji.cz>

On Tue, 17 May 2005, Petr Baudis wrote:

> Dear diary, on Tue, May 17, 2005 at 11:20:54PM CEST, I got a letter
> where Daniel Barkalow <barkalow@iabervon.org> told me that...
> > Hmm... maybe the right thing is to make the implementation-provided
> > transfer code handle arbitrary things in GIT_DIR, but have code for
> > updating reference files atomically and using a reference file to start
> > from use "refs/"? Certainly, there's nothing special about reference files
> > in transit.
> > 
> > Certainly the things in the info/ directory shouldn't be treated a head
> > that you're going to pull, so that has to be different above the protocol
> > level anyway.
> 
> *confused* :) I'm sorry, I have trouble understanding this. Could you
> rephrase, please?

If you want to get info/ignore, you want to get it and save it, not
download a set of objects it refers to. So it's different from specifying
that you want to use refs/heads/master as the starting point for a pull.

There would be a separation between transfering whatever file you specify
and treating the specified (remote) file from refs/ as the starting point
for pulling objects.

Also, you don't need to do the same kind of careful update, since the
desired value of info/ignore isn't going to depend on the previous
value.

> > So the remote receiver should get an instruction: change X from OLD to NEW
> > and pull NEW. It should:
> > 
> >  - lock the file against further updates
> >  - check that the current value is the provided OLD
> >  - pull the necessary objects
> >  - write NEW to the file
> - unlock the file ;-))

The way I'm actually doing things is to write NEW into the lock file at
some arbitrary point, and "writing to the file" is actually renaming the
lock file to the normal filename. So writing unlocks the file
automatically.

> >  - report success
> > 
> > On failure of any step, it should unlock the file without changing it.
> 
> Sounds right.

I think I'll get to implementing it Wednesday night. I might be able to
get the first step done tonight (my previous patch, except with the
transfer applying to arbitrary files).

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* Re: [PATCH 1/2] Introduce git-run-with-user-path helper program.
From: Junio C Hamano @ 2005-05-17 22:13 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git, torvalds
In-Reply-To: <20050517213752.GO7136@pasky.ji.cz>

>>>>> "PB" == Petr Baudis <pasky@ucw.cz> writes:

PB> But that won't work good enough for me. E.g. when committing in a
PB> subdirectory, I want to commit only changes made in the subdirectory,
PB> etc.

Assuming that you have something that lets you commit selected
files when you are at the top level (say cg-commit), and further
assuming that today it only works from the toplevel, that is:

    $ pwd
    /usr/src/linux
    $ cg-commit fs/ext?/Makefile

works today, what I am saying is:

    $ pwd
    /usr/src/linux/fs
    $ git-run-with-user-path cg-commit -- ext?/Makefile

would work.

Usually the command like cg-commit would take non-path
parameters, so if this works today:

    $ pwd
    /usr/src/linux
    $ cg-commit -m 'Changed Makefile' fs/ext?/Makefile

then:

    $ pwd
    /usr/src/linux/fs
    $ git-run-with-user-path cg-commit -m 'Changed Makefile' -- ext?/Makefile

would work.

Once you have a core that works well but only at the top
directory level, then you can make a thin wrapper using
git-run-with-user-path to make that work equally well with the
filesystem path from subdirectories.  And the core-ish thing
that only works at the top directory level does not need to
worry about finding .git/ anymore, which is the whole point of
what this helper is giving you.

BTW, I am wondering if your choice of cg-commit as an example
(as opposed to something else like diff or add) is a flamebait
or just an innocent random example ;-)?

^ permalink raw reply

* Re: [PATCH 0/4] Pulling refs files
From: Petr Baudis @ 2005-05-17 21:45 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git, Linus Torvalds
In-Reply-To: <Pine.LNX.4.21.0505171645130.30848-100000@iabervon.org>

Dear diary, on Tue, May 17, 2005 at 11:20:54PM CEST, I got a letter
where Daniel Barkalow <barkalow@iabervon.org> told me that...
> On Tue, 17 May 2005, Petr Baudis wrote:
> > Anything that gets eventually wound up in the info/ directory. (The name
> > of the ignore file saved in info/ignore is the current hit.)
> 
> Hmm... maybe the right thing is to make the implementation-provided
> transfer code handle arbitrary things in GIT_DIR, but have code for
> updating reference files atomically and using a reference file to start
> from use "refs/"? Certainly, there's nothing special about reference files
> in transit.
> 
> Certainly the things in the info/ directory shouldn't be treated a head
> that you're going to pull, so that has to be different above the protocol
> level anyway.

*confused* :) I'm sorry, I have trouble understanding this. Could you
rephrase, please?

> > Well, it'd be again nice to have some generic mechanism for this so that
> > the user could theoretically push over rsync too or something (although
> > that'll be even more racy, it is fine for single-user repository).
> 
> Hmm; I'm not sure what would be good for interfacing with rsync.

I've been thinking about writing some FTP-like client for rsync, where
you could "interactively" tell it what files to download etc.

> > I think the remote file to write the value inside should be porcelain
> > business.
> 
> Certainly it's porcelain business what remote file to write; but I think
> it has to be core business doing the lock, test, and update. I think it
> would be inconvenient to go back to the porcelain layer in the middle of
> the operation, particularly since it would have to go back to the core,
> which is what has the connection to the remote host.

Of course. The porcelain file would just provide the filename.

> > What you should always check though is that before the pull
> > (and after the locking) the value in that file is the same as the "push
> > base". This way you make sure that you are still following a single
> > branch and in case of multiuser repositories that you were fully merged
> > before pushing.
> 
> So the remote receiver should get an instruction: change X from OLD to NEW
> and pull NEW. It should:
> 
>  - lock the file against further updates
>  - check that the current value is the provided OLD
>  - pull the necessary objects
>  - write NEW to the file
- unlock the file ;-))
>  - report success
> 
> On failure of any step, it should unlock the file without changing it.

Sounds right.

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply

* Re: [PATCH] uml: remove elf.h
From: Linus Torvalds @ 2005-05-17 21:44 UTC (permalink / raw)
  To: Petr Baudis
  Cc: Andrew Morton, git, linux-kernel,
	Paolo 'Blaisorblade' Giarrusso
In-Reply-To: <20050517213447.GN7136@pasky.ji.cz>



On Tue, 17 May 2005, Petr Baudis wrote:
>
> Perhaps some artificial timestamp could help to the file
> removal heuristic in GNU patch. Or passing it -E, but that will
> obviously do the wrong thing to any other zero-sized files.

-E is always correct for the kernel, since zero-length files aren't really 
supposed to exist anyway, and "make distclean" has always removed them.

		Linus

^ permalink raw reply

* Re: [PATCH] uml: remove elf.h
From: Linus Torvalds @ 2005-05-17 21:42 UTC (permalink / raw)
  To: Andrew Morton; +Cc: git, linux-kernel, Paolo 'Blaisorblade' Giarrusso
In-Reply-To: <20050517142113.59097a3d.akpm@osdl.org>



On Tue, 17 May 2005, Andrew Morton wrote:
> 
> And I bet that when Linus releases patch-2.6.12-rc5.gz and patch-2.6.12.gz,
> they will have the same construct.  AFAICT, the patch-based people will
> need to download a full new tarball to get rid of this dang file.

Or just run "make distclean" once.

> It all wouldn't really matter much, except apparently the mere presence of
> this file breaks the UML build.
> 
> Frazzle.  Paolo, I'm almost wondering if we should change that test to also
> check for a zero-length file.

How many people are affected? The file _is_ gone in the git archives, and 
in fact I wonder if it was ever there, but I didn't bother to check.

		Linus

^ permalink raw reply

* Re: [PATCH 1/2] Introduce git-run-with-user-path helper program.
From: Petr Baudis @ 2005-05-17 21:37 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, torvalds
In-Reply-To: <7v4qd1tuud.fsf@assigned-by-dhcp.cox.net>

Dear diary, on Tue, May 17, 2005 at 11:18:18PM CEST, I got a letter
where Junio C Hamano <junio@siamese.dyndns.org> told me that...
> If your plan is to make Cogito take filesystem paths, then you

Yes, that's my plan.

> can move bulk of the code currently in cg-blah, except the part
> that picks up non-path parameters, to cg-Xblah, and reduce
> cg-blah implementation down to just:
> 
>     ... parse options by shifting "$@" out.
>     ... then
>     git-run-with-user-path cg-Xblah $non-path-opts -- "$@"
> 
> and you can rip "the code to figure out .git" out from cg-Xblah.
> There is nothing to figure out at that point; it always is
> ${GIT_DIR-.git}/.

But that won't work good enough for me. E.g. when committing in a
subdirectory, I want to commit only changes made in the subdirectory,
etc.

Not even talking about much uglier implementation (that could be
remedied by calling myself recursively with some special argument).

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply

* Re: [PATCH] uml: remove elf.h
From: Petr Baudis @ 2005-05-17 21:34 UTC (permalink / raw)
  To: Andrew Morton
  Cc: git, linux-kernel, Linus Torvalds,
	Paolo 'Blaisorblade' Giarrusso
In-Reply-To: <20050517142113.59097a3d.akpm@osdl.org>

Dear diary, on Tue, May 17, 2005 at 11:21:13PM CEST, I got a letter
where Andrew Morton <akpm@osdl.org> told me that...
> Linux Kernel Mailing List <linux-kernel@vger.kernel.org> wrote:
> >
> > tree a3d85d9f43f64bbd8437c973caf98f79d95b5f3e
> > parent a123edab03ac39e08c2f9cb4fc1af07e099c68bc
> > author Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Tue, 17 May 2005 11:53:14 -0700
> > committer Linus Torvalds <torvalds@ppc970.osdl.org> Tue, 17 May 2005 21:59:11 -0700
> > 
> > [PATCH] uml: remove elf.h
> > 
> > Actually remove elf.h in the tree.  The previous patch, due to a quilt
> > bug/misuse, left it in the tree as a 0-length file, preventing the build to
> > see it as missing and to generate a symlink in its place.
> > 
> > Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
> > Signed-off-by: Andrew Morton <akpm@osdl.org>
> > Signed-off-by: Linus Torvalds <torvalds@osdl.org>
> > 
> >  asm-um/elf.h |    0 
> >  1 files changed
> > 
> > Index: include/asm-um/elf.h
> 
> Hot damn, this zero-length file is hard to get rid of.  I pulled Linus's
> tree this morning with this bizarre concoction:
> 
> 	cd $GIT_TREE
> 	cg-pull origin
> 	tagsha1=$(cat .git/refs/tags/v$(kversion))
> 	t=$(cat-file tag $tagsha1 | head -n 1 | sed -e 's/object //')
> 	cg-diff -r $t -r $(cat .git/refs/heads/origin) > $PULL/linus.patch
> 
> and the resulting diff has:
> 
> Index: include/asm-ia64/ioctl32.h
> ===================================================================
> --- eed337ef5e9ae7d62caa84b7974a11fddc7f06e0/include/asm-ia64/ioctl32.h  (mode:100644 sha1:d0d227f45e05d23705ac849f4bd5c06a28288b58)
> +++ 6bb5a1cf91bbda8308ec7e6d900cb89071907dcd/include/asm-ia64/ioctl32.h  (mode:100644 sha1:e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
> @@ -1 +0,0 @@
> -#include <linux/ioctl32.h>
> Index: include/asm-um/elf.h
> ===================================================================
> Index: include/asm-x86_64/apicdef.h
> ===================================================================
> 
> which of course doesn't remove that file at all.
> 
> And I bet that when Linus releases patch-2.6.12-rc5.gz and patch-2.6.12.gz,
> they will have the same construct.  AFAICT, the patch-based people will
> need to download a full new tarball to get rid of this dang file.

Feeding
--- include/asm-um/elf.h
+++ /dev/null
patch to cg-patch would make Cogito kill it. No help for regular patch
though, I fear. Perhaps some artificial timestamp could help to the file
removal heuristic in GNU patch. Or passing it -E, but that will
obviously do the wrong thing to any other zero-sized files.

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply

* Re: [PATCH] Constify
From: Petr Baudis @ 2005-05-17 21:27 UTC (permalink / raw)
  To: Morten Welinder; +Cc: GIT Mailing List
In-Reply-To: <118833cc050422110872a6f4a5@mail.gmail.com>

Dear diary, on Fri, Apr 22, 2005 at 08:08:02PM CEST, I got a letter
where Morten Welinder <mwelinder@gmail.com> told me that...
> Hi!

Hello,

> This patch makes strings type "const char *" and keeps people honest.
> [Here's to hoping that nothing in this email setup mangles whitespace...]
> 
> Signed-off-by: Morten Welinder (mwelinder@gmail.com)

thanks. It was somewhat mangled, but the main problem was that it has
rot for too long in my queue, so only quite small part of it survived
until today. :-( I've not applied the CFLAGS+=-Wwrite-strings part yet,
since it would require some additional cleanups and such, so I'll let
someone else deal with that. ;-)

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply

* Re: [PATCH] uml: remove elf.h
From: Andrew Morton @ 2005-05-17 21:21 UTC (permalink / raw)
  To: git, linux-kernel; +Cc: Linus Torvalds, Paolo 'Blaisorblade' Giarrusso
In-Reply-To: <200505171704.j4HH4Ne8002532@hera.kernel.org>

Linux Kernel Mailing List <linux-kernel@vger.kernel.org> wrote:
>
> tree a3d85d9f43f64bbd8437c973caf98f79d95b5f3e
> parent a123edab03ac39e08c2f9cb4fc1af07e099c68bc
> author Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it> Tue, 17 May 2005 11:53:14 -0700
> committer Linus Torvalds <torvalds@ppc970.osdl.org> Tue, 17 May 2005 21:59:11 -0700
> 
> [PATCH] uml: remove elf.h
> 
> Actually remove elf.h in the tree.  The previous patch, due to a quilt
> bug/misuse, left it in the tree as a 0-length file, preventing the build to
> see it as missing and to generate a symlink in its place.
> 
> Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
> Signed-off-by: Andrew Morton <akpm@osdl.org>
> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
> 
>  asm-um/elf.h |    0 
>  1 files changed
> 
> Index: include/asm-um/elf.h

Hot damn, this zero-length file is hard to get rid of.  I pulled Linus's
tree this morning with this bizarre concoction:

	cd $GIT_TREE
	cg-pull origin
	tagsha1=$(cat .git/refs/tags/v$(kversion))
	t=$(cat-file tag $tagsha1 | head -n 1 | sed -e 's/object //')
	cg-diff -r $t -r $(cat .git/refs/heads/origin) > $PULL/linus.patch

and the resulting diff has:

Index: include/asm-ia64/ioctl32.h
===================================================================
--- eed337ef5e9ae7d62caa84b7974a11fddc7f06e0/include/asm-ia64/ioctl32.h  (mode:100644 sha1:d0d227f45e05d23705ac849f4bd5c06a28288b58)
+++ 6bb5a1cf91bbda8308ec7e6d900cb89071907dcd/include/asm-ia64/ioctl32.h  (mode:100644 sha1:e69de29bb2d1d6434b8b29ae775ad8c2e48c5391)
@@ -1 +0,0 @@
-#include <linux/ioctl32.h>
Index: include/asm-um/elf.h
===================================================================
Index: include/asm-x86_64/apicdef.h
===================================================================

which of course doesn't remove that file at all.

And I bet that when Linus releases patch-2.6.12-rc5.gz and patch-2.6.12.gz,
they will have the same construct.  AFAICT, the patch-based people will
need to download a full new tarball to get rid of this dang file.

It all wouldn't really matter much, except apparently the mere presence of
this file breaks the UML build.

Frazzle.  Paolo, I'm almost wondering if we should change that test to also
check for a zero-length file.

^ permalink raw reply

* Re: [PATCH 0/4] Pulling refs files
From: Daniel Barkalow @ 2005-05-17 21:20 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git, Linus Torvalds
In-Reply-To: <20050517201436.GC7136@pasky.ji.cz>

On Tue, 17 May 2005, Petr Baudis wrote:

> Dear diary, on Sun, May 15, 2005 at 05:23:18AM CEST, I got a letter
> where Daniel Barkalow <barkalow@iabervon.org> told me that...
> > On Sat, 14 May 2005, Petr Baudis wrote:
> > 
> > > So what about just something like
> > > 
> > > 	git-wormhole-pull remote:refs/head/master wormhole://localhost/
> > > 
> > > That is, you could just specify remote:path_relative_to_url instead of
> > > SHA1 id as the commit.
> > 
> > Do you have any sensible alternatives to "remote:refs/<something>" in
> > mind? I suppose that "remote:HEAD" would also work. How are you thinking
> > of having the value get written locally?
> 
> Anything that gets eventually wound up in the info/ directory. (The name
> of the ignore file saved in info/ignore is the current hit.)

Hmm... maybe the right thing is to make the implementation-provided
transfer code handle arbitrary things in GIT_DIR, but have code for
updating reference files atomically and using a reference file to start
from use "refs/"? Certainly, there's nothing special about reference files
in transit.

Certainly the things in the info/ directory shouldn't be treated a head
that you're going to pull, so that has to be different above the protocol
level anyway.

> Well, it'd be again nice to have some generic mechanism for this so that
> the user could theoretically push over rsync too or something (although
> that'll be even more racy, it is fine for single-user repository).

Hmm; I'm not sure what would be good for interfacing with rsync.

> I think the remote file to write the value inside should be porcelain
> business.

Certainly it's porcelain business what remote file to write; but I think
it has to be core business doing the lock, test, and update. I think it
would be inconvenient to go back to the porcelain layer in the middle of
the operation, particularly since it would have to go back to the core,
which is what has the connection to the remote host.

> What you should always check though is that before the pull
> (and after the locking) the value in that file is the same as the "push
> base". This way you make sure that you are still following a single
> branch and in case of multiuser repositories that you were fully merged
> before pushing.

So the remote receiver should get an instruction: change X from OLD to NEW
and pull NEW. It should:

 - lock the file against further updates
 - check that the current value is the provided OLD
 - pull the necessary objects
 - write NEW to the file
 - report success

On failure of any step, it should unlock the file without changing it.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* Re: [PATCH 2/4] Tweak diff output further to make it a bit less distracting.
From: Junio C Hamano @ 2005-05-17 21:19 UTC (permalink / raw)
  To: Petr Baudis; +Cc: Daniel Barkalow, Linus Torvalds, git
In-Reply-To: <20050517211132.GK7136@pasky.ji.cz>

>>>>> "PB" == Petr Baudis <pasky@ucw.cz> writes:

PB> Oops, I've somehow completely missed this mail, but I like this idea a
PB> lot. What do you think, Linus and Junio?

I find the original Linus one easier to read.

^ permalink raw reply

* Re: [PATCH 1/2] Introduce git-run-with-user-path helper program.
From: Junio C Hamano @ 2005-05-17 21:18 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git, torvalds
In-Reply-To: <20050517203500.GH7136@pasky.ji.cz>

>>>>> "PB" == Petr Baudis <pasky@ucw.cz> writes:

PB> Actually, my doubts about general usefulness of this wrapper are
PB> growing. Cogito is unlikely to ever make use of it since it has to
PB> figure out the .git location anyway for own use (it keeps plenty of own
PB> files there).

I think "having to figure out .git anyway" is backwards, if your
plan is to make Cogito take filesystem paths as opposed to GIT
paths.  If the plan for Cogito is to take always GIT paths,
which is a sensible way as well, then it is irrelevant for the
implementation of Cogito, but then it becomes useful for users
of Cogito).

If your plan is to make Cogito take filesystem paths, then you
can move bulk of the code currently in cg-blah, except the part
that picks up non-path parameters, to cg-Xblah, and reduce
cg-blah implementation down to just:

    ... parse options by shifting "$@" out.
    ... then
    git-run-with-user-path cg-Xblah $non-path-opts -- "$@"

and you can rip "the code to figure out .git" out from cg-Xblah.
There is nothing to figure out at that point; it always is
${GIT_DIR-.git}/.

^ permalink raw reply

* Re: [PATCH 2/4] Tweak diff output further to make it a bit less distracting.
From: Petr Baudis @ 2005-05-17 21:11 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: Linus Torvalds, Junio C Hamano, git
In-Reply-To: <Pine.LNX.4.21.0505161955340.30848-100000@iabervon.org>

Dear diary, on Tue, May 17, 2005 at 02:10:35AM CEST, I got a letter
where Daniel Barkalow <barkalow@iabervon.org> told me that...
> On Mon, 16 May 2005, Linus Torvalds wrote:
> 
> > One final note: I actually think that "rename patches" make a ton of 
> > sense, even if git itself doesn't track renames. If we ever have a "smart 
> > diff" thing that can generate inter-file diffs, I'd like to eventually see
> > 
> > 	diff -git a/kernel/sched.c b/kernel/sched.c.old
> > 	rename kernel/sched.c kernel/sched.c.old
> > 	old mode 100644
> > 	new mode 100755
> 
> I'd like something like:
> 
> diff -git a/kernel/sched.c b/kernel/sched.c.old
> filename -- kernel/sched.c
> filename ++ kernel/sched.c.old
> mode -- 100644
> mode ++ 100755
> --- a/kernel/sched.c
> +++ b/kernel/sched.c.old
> @@ -1,5 +1,5 @@
> (etc.)
> 
> because I actually start thinking of the two sides as "-" and "+", and I'd
> actually have to think about which is "old" and which is "new", and which
> way the "rename" line goes, and so forth. I'd actually be happier with
> just a "mode -- 100644" line for a deleted file, also. If I'm looking at a
> patch, and I read Makefile with '-' and '+' versions of the lists of
> objects, and then get to a "new file" line, I have to think about it to
> associate the '+' side with having the file and the '-' side with not
> having it.

Oops, I've somehow completely missed this mail, but I like this idea a
lot. What do you think, Linus and Junio?

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
C++: an octopus made by nailing extra legs onto a dog. -- Steve Taylor

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox