Git development

Git development
 help / color / mirror / Atom feed

* [PATCH] Make diff-cache and friends output more cg-patch friendly.
From: Junio C Hamano @ 2005-04-28  6:28 UTC (permalink / raw)
  To: Linus Torvalds, Petr Baudis; +Cc: Andrew Morton, git
In-Reply-To: <7vhdhra2sg.fsf@assigned-by-dhcp.cox.net>

This patch changes the way the default arguments to diff are
built when diff-cache and friends are invoked with -p and there
is no GIT_EXTERNAL_DIFF environment variable.  It attempts to be
more cg-patch friendly by:

 - Showing diffs against /dev/null to denote added or removed
   files;

 - Showing file modes for existing files as a comment after the
   diff label.

Unfortunately with this change GIT_DIFF_CMD customization cannot
be supported easily anymore, so it has been dropped.
GIT_DIFF_OPTS customization to change diffs from unified to
context is still there, though.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

diff.c |   56 ++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 36 insertions(+), 20 deletions(-)

# - 04/27 21:50 diff.c clean up temporary file.
# + 04/27 23:18 Attempt to minimally be compatible with cg-Xdiffdo.
--- k/diff.c  (mode:100644)
+++ l/diff.c  (mode:100644)
@@ -7,7 +7,6 @@
 #include "cache.h"
 #include "diff.h"
 
-static char *diff_cmd = "diff -L'k/%s' -L'l/%s'";
 static char *diff_opts = "-pu";
 
 static const char *external_diff(void)
@@ -24,14 +23,12 @@ static const char *external_diff(void)
 	 * alternative styles you can specify via environment
 	 * variables are:
 	 *
-	 * GIT_DIFF_CMD="diff -L '%s' -L '%s'"
 	 * GIT_DIFF_OPTS="-c";
 	 */
 	if (getenv("GIT_EXTERNAL_DIFF"))
 		external_diff_cmd = getenv("GIT_EXTERNAL_DIFF");
 
 	/* In case external diff fails... */
-	diff_cmd = getenv("GIT_DIFF_CMD") ? : diff_cmd;
 	diff_opts = getenv("GIT_DIFF_OPTS") ? : diff_opts;
 
 	done_preparing = 1;
@@ -84,31 +81,50 @@ static struct diff_tempfile {
 static void builtin_diff(const char *name,
 			 struct diff_tempfile *temp)
 {
-	static char *diff_arg  = "'%s' '%s'";
-	const char *name_1_sq = sq_expand(temp[0].name);
-	const char *name_2_sq = sq_expand(temp[1].name);
+	int i, next_at;
+	const char *diff_cmd = "diff -L'%s%s%s' -L'%s%s%s'";
+	const char *diff_arg  = "'%s' '%s'";
+	const char *input_name_sq[2];
+	const char *path0[2];
+	const char *path1[2];
+	char mode[2][20];
 	const char *name_sq = sq_expand(name);
-
-	/* diff_cmd and diff_arg have 4 %s in total which makes
-	 * the sum of these strings 8 bytes larger than required.
+	char *cmd;
+	
+	/* diff_cmd and diff_arg have 8 %s in total which makes
+	 * the sum of these strings 16 bytes larger than required.
 	 * we use 2 spaces around diff-opts, and we need to count
-	 * terminating NUL, so we subtract 5 here.
+	 * terminating NUL, so we subtract 13 here.
 	 */
-	int cmd_size = (strlen(diff_cmd) + 
-			strlen(name_sq) * 2 +
-			strlen(diff_opts) +
-			strlen(diff_arg) +
-			strlen(name_1_sq) + strlen(name_2_sq)
-			- 5);
-	char *cmd = xmalloc(cmd_size);
-	int next_at = 0;
+	int cmd_size = (strlen(diff_cmd) + strlen(diff_opts) +
+			strlen(diff_arg) - 13);
+	for (i = 0; i < 2; i++) {
+		input_name_sq[i] = sq_expand(temp[i].name);
+		if (!strcmp(temp[i].name, "/dev/null")) {
+			path0[i] = "/dev/null";
+			path1[i] = "";
+			mode[i][0] = 0;
+		} else {
+			path0[i] = i ? "l/" : "k/";
+			path1[i] = name_sq;
+			sprintf(mode[i], "  (mode:%s)", temp[i].mode);
+		}
+		cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
+			     strlen(mode[i]) + strlen(input_name_sq[i]));
+	}
+
+	cmd = xmalloc(cmd_size);
 
+	next_at = 0;
 	next_at += snprintf(cmd+next_at, cmd_size-next_at,
-			    diff_cmd, name_sq, name_sq);
+			    diff_cmd,
+			    path0[0], path1[0], mode[0],
+			    path0[1], path1[1], mode[1]);
 	next_at += snprintf(cmd+next_at, cmd_size-next_at,
 			    " %s ", diff_opts);
 	next_at += snprintf(cmd+next_at, cmd_size-next_at,
-			    diff_arg, name_1_sq, name_2_sq);
+			    diff_arg, input_name_sq[0], input_name_sq[1]);
+
 	execlp("/bin/sh","sh", "-c", cmd, NULL);
 }
 


^ permalink raw reply

* [3'/5] Add function to parse an object of unspecified type (take 2)
From: Daniel Barkalow @ 2005-04-28  6:01 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280106540.30848-100000@iabervon.org>

This adds a function that parses an object from the database when we have
to look up its actual type. It also checks the hash of the file, due to
its heritage as part of fsck-cache.

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: object.c
===================================================================
--- bcb43fecd82fa6f851e266b7e3873973068d51f3/object.c  (mode:100644 sha1:91bbc6e5e2eadfb0a66b14d992eac260d07267f8)
+++ 52505514b78c9bd77fcd701663c1967919d1cec3/object.c  (mode:100644 sha1:ca4af8fa2dc0672b92310a3ebdd4d14bf070dd69)
@@ -1,5 +1,9 @@
 #include "object.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
 #include "cache.h"
+#include "tag.h"
 #include <stdlib.h>
 #include <string.h>
 
@@ -94,3 +98,39 @@
 		p = p->next;
 	}
 }
+
+struct object *parse_object(unsigned char *sha1)
+{
+	unsigned long mapsize;
+	void *map = map_sha1_file(sha1, &mapsize);
+	if (map) {
+		char type[100];
+		unsigned long size;
+		void *buffer = unpack_sha1_file(map, mapsize, type, &size);
+		if (!buffer)
+			return NULL;
+		if (check_sha1_signature(sha1, buffer, size, type) < 0)
+			printf("sha1 mismatch %s\n", sha1_to_hex(sha1));
+		munmap(map, mapsize);
+		if (!strcmp(type, "blob")) {
+			struct blob *ret = lookup_blob(sha1);
+			parse_blob(ret);
+			return &ret->object;
+		} else if (!strcmp(type, "tree")) {
+			struct tree *ret = lookup_tree(sha1);
+			parse_tree(ret);
+			return &ret->object;
+		} else if (!strcmp(type, "commit")) {
+			struct commit *ret = lookup_commit(sha1);
+			parse_commit(ret);
+			return &ret->object;
+		} else if (!strcmp(type, "tag")) {
+			struct tag *ret = lookup_tag(sha1);
+			parse_tag(ret);
+			return &ret->object;
+		} else {
+			return NULL;
+		}
+	}
+	return NULL;
+}
Index: object.h
===================================================================
--- bcb43fecd82fa6f851e266b7e3873973068d51f3/object.h  (mode:100644 sha1:bc607fd55f6ce4e56ce87766369b5d4d55ec79af)
+++ 52505514b78c9bd77fcd701663c1967919d1cec3/object.h  (mode:100644 sha1:d53a35a4d7321b5ec970103208ac576f9f722dff)
@@ -22,6 +22,9 @@
 
 void created_object(unsigned char *sha1, struct object *obj);
 
+/** Returns the object, having parsed it to find out what it is. **/
+struct object *parse_object(unsigned char *sha1);
+
 void add_ref(struct object *refer, struct object *target);
 
 void mark_reachable(struct object *obj, unsigned int mask);


^ permalink raw reply

* [1'/5] Mark blobs as parsed when they're actually parsed
From: Daniel Barkalow @ 2005-04-28  5:58 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280131400.30848-100000@iabervon.org>

This eliminates the special case for blobs versus other types of
objects. Now the scheme is entirely regular and I won't introduce stupid
bugs. (And fsck-cache doesn't have to do the do-nothing parse)

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: blob.c
===================================================================
--- 89fdfd09b281fdf5071bc13a30ef683bd6851b61/blob.c  (mode:100644 sha1:3d99b93f020d84c5410c2b1056f2d7446b647d1e)
+++ bcb43fecd82fa6f851e266b7e3873973068d51f3/blob.c  (mode:100644 sha1:d4af4a309433744d2fe819886d66741ab016f62b)
@@ -14,10 +14,28 @@
 		ret->object.type = blob_type;
 		return ret;
 	}
-	if (obj->parsed && obj->type != blob_type) {
+	if (obj->type != blob_type) {
 		error("Object %s is a %s, not a blob", 
 		      sha1_to_hex(sha1), obj->type);
 		return NULL;
 	}
 	return (struct blob *) obj;
 }
+
+int parse_blob(struct blob *item)
+{
+        char type[20];
+        void *buffer;
+        unsigned long size;
+        if (item->object.parsed)
+                return 0;
+        item->object.parsed = 1;
+        buffer = read_sha1_file(item->object.sha1, type, &size);
+        if (!buffer)
+                return error("Could not read %s",
+                             sha1_to_hex(item->object.sha1));
+        if (strcmp(type, blob_type))
+                return error("Object %s not a blob",
+                             sha1_to_hex(item->object.sha1));
+	return 0;
+}
Index: blob.h
===================================================================
--- 89fdfd09b281fdf5071bc13a30ef683bd6851b61/blob.h  (mode:100644 sha1:5cbf6d65ee88e1e0c0f1153af4aa7f80c3c48c16)
+++ bcb43fecd82fa6f851e266b7e3873973068d51f3/blob.h  (mode:100644 sha1:4afad0f067e1979d7ccc4778e254cce6962ad136)
@@ -11,4 +11,6 @@
 
 struct blob *lookup_blob(unsigned char *sha1);
 
+int parse_blob(struct blob *item);
+
 #endif /* BLOB_H */


^ permalink raw reply

* Re: [ANNOUNCE] gitkdiff 0.1
From: Greg KH @ 2005-04-28  5:49 UTC (permalink / raw)
  To: Tejun Heo; +Cc: git
In-Reply-To: <4270711F.7020501@gmail.com>

On Thu, Apr 28, 2005 at 02:14:07PM +0900, Tejun Heo wrote:
> 
>  Hello, guys.
> 
>  I've hacked tkdiff and made a commit viewing utility.  Just download
> the following tarball and unpack it whereever PATH points to.  It
> assumes that all base git executables are visible via PATH.
> 
>  http://home-tj.org/gitui/files/gitui-200504281405.tar.gz

Very nice, I like it a lot, thanks for doing this.

greg k-h

^ permalink raw reply

* Re: [1/5] Consider a blob to be parsed
From: Daniel Barkalow @ 2005-04-28  5:49 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.58.0504272217240.18901@ppc970.osdl.org>

On Wed, 27 Apr 2005, Linus Torvalds wrote:

> 
> 
> On Thu, 28 Apr 2005, Daniel Barkalow wrote:
> >
> > We don't parse blobs at all, so any that we've got are as parsed as
> > they're going to get. Don't make fsck-cache mark them.
> 
> NO NO NO!
> 
> This is WRONG, dammit. I fixed it once, you are re-introducing the same
> bug.


> Daniel, the problem is that you parse them only when you SEE them, and 
> that is totally different from having seen a REFERENCE to them. One says 
> "I've seen this object", the other says "I _want_ to see this object". 
> They are two totally different things.

Good point; the right fix is actually to have a parse_blob that checks to
make sure it's there, and not have a special case for the simple case.

Replacement [1/5] to follow...

	-Daniel
*This .sig left intentionally blank*


^ permalink raw reply

* Re: Cogito Tutorial If It Helps
From: Alan Chandler @ 2005-04-28  5:40 UTC (permalink / raw)
  To: git; +Cc: James Purser
In-Reply-To: <200504272315.22939.alan@chandlerfamily.org.uk>

On Wednesday 27 April 2005 23:15, Alan Chandler wrote:
> On Wednesday 27 April 2005 20:32, Petr Baudis wrote:

> > You didn't do make install and you don't have the cogito tree in your
> > $PATH.
>
> I DID do a make install - which put everything in ~/bin (including
> cg-Xdiffdo) and ~/bin is the first item in my $PATH.

Thanks to James Purser its fixed.

I needed the full path to ~/bin rather than just using the ~ in $PATH.
-- 
Alan Chandler
http://www.chandlerfamily.org.uk

^ permalink raw reply

* Re: I'm missing isofs.h
From: Junio C Hamano @ 2005-04-28  5:27 UTC (permalink / raw)
  To: Petr Baudis; +Cc: Linus Torvalds, Andrew Morton, git
In-Reply-To: <20050428003246.GV22956@pasky.ji.cz>

>>>>> "PB" == Petr Baudis <pasky@ucw.cz> writes:

PB> Actually, I can't; the patch generator is not on par with mine yet.
PB> It does not show modes and does not indicate file adds/removals by
PB> /dev/null - basically, I need something cg-patch can eat (and it should
PB> be backwards compatible). I think throwing the sha1 hashes away will not
PB> harm; I got used to the Index: field and === marker, but I don't care if
PB> I loose it.

I've looked at what cg-Xdiffdo does.  From the above paragraph,
I sense that it does more than what cg-patch requires, so I took
a look at cg-patch, too.  

Can you help me verify if I understand the requirements cg-patch
has on its input correctly?

 - Follow the convention of showing newly added files with
   "--- /dev/null" and removed files with "+++ /dev/null";

 - Label matches this Perl regexp:

     m|^(---|\+\+\+)\s+[^/]+\/(\S+)\s+.*mode:([0-7]{3,}).*/|

   and you only care about sign ($1), filename ($2) and mode ($3).

To illustrate, cg-Xdiffdo generates something like:

 (modified files)
 --- FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/fs/ext3/Makefile  (mode:0644)
 +++ EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE/fs/ext3/Makefile  (mode:0664)

 (deleted files)
 --- FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/fs/ext3/Makefile  (mode:0644)
 +++ /dev/null  (tree:EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE)

 (added files)
 --- /dev/null  (tree:EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE)
 +++ FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF/fs/ext3/Makefile  (mode:0644)

but they could be like the following to satisfy cg-patch:

 (modified files)
 --- a/fs/ext3/Makefile  (mode:0644)
 +++ b/fs/ext3/Makefile  (mode:0664)

 (deleted files)
 --- a/fs/ext3/Makefile  (mode:0644)
 +++ /dev/null

 (added files)
 --- /dev/null
 +++ b/fs/ext3/Makefile  (mode:0644)

Is my understanding correct?  If so it should not be too much
work to generate something like it from within the builtin
stuff.

Provided if that is what the kernel folks can live with (I do
see why the tool wants the mode bits, but it is unusual to see
non-timestamp strings after filenames).

Linus & Andrew, is the above (second) format acceptable for the
kernel work?

^ permalink raw reply

* Re: [1/5] Consider a blob to be parsed
From: Linus Torvalds @ 2005-04-28  5:28 UTC (permalink / raw)
  To: Daniel Barkalow; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280100360.30848-100000@iabervon.org>

On Thu, 28 Apr 2005, Daniel Barkalow wrote:
>
> We don't parse blobs at all, so any that we've got are as parsed as
> they're going to get. Don't make fsck-cache mark them.

NO NO NO!

This is WRONG, dammit. I fixed it once, you are re-introducing the same
bug.

Daniel, the problem is that you parse them only when you SEE them, and 
that is totally different from having seen a REFERENCE to them. One says 
"I've seen this object", the other says "I _want_ to see this object". 
They are two totally different things.

You now mark all "blob" objects parsed regardless of whether you have 
actually seen the blob or not. Ie you mark a blob parsed just from having 
seen a _reference_ to it, and fsck can never know whether it actually 
really saw the object or not.

This is the commit that already fixed this bug once, and that you are now 
re-introducing:

	commit 4728b861ace127dc39c648f3bea64c3b86bbabc5
	tree 242227fc3c3a74d070ed36496e790335dd00c44a
	parent da6abf5d9c342a74dffbcc2015b9c27d7819a900
	author Linus Torvalds <torvalds@ppc970.osdl.org> Sun, 24 Apr 2005 14:10:55 -0700
	committer Linus Torvalds <torvalds@ppc970.osdl.org> Sun, 24 Apr 2005 14:10:55 -0700

	    fsck-cache: notice missing "blob" objects.

	    We should _not_ mark a blob object "parsed" just because we
	    looked it up: it gets marked that way only once we've actually
	    seen it. Otherwise we can never notice a missing blob.

please think about it.

Try to make some test-cases for fsck. They are quite easy to make: copy a 
good directory, and 
 - remove one commit (in the middle)
 - remove at least on tree
 - remove at least one blob
 - corrupt a file obviously (make it not uncompress ok)
 - corrupt a file in a subtle way (make it uncompress ok and have the
   right signature, but be the wrong type - for example, make a "commit" 
   object that points to a "tree" object that actually is a "blob")

And you'll see how this "consider a blob parsed" totally destroys fsck's 
ability to notice that the blob doesn't even _exist_ any more (case 3 
above).

"parsing" and "looking up" are two totally independent operations. They
are independent for commits and trees, and they are independent for blobs.  

To mark a blob parsed, you _need_ to have actually looked it up and
verified that it exists and that the object header is valid (and if you're 
fsck, that the sha1 matches). You MUST NOT do it in "lookup_blob()".

		Linus

^ permalink raw reply

* [5/5] Make fsck-cache read heads out of .git/refs/*/*
From: Daniel Barkalow @ 2005-04-28  5:17 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280055180.30848-100000@iabervon.org>

This makes fsck-cache expect the root set of the reachability graph (i.e.,
your heads and tags) to be .git/refs/*/*, which contain hex versions of
hashs of roots (like .get/HEAD has always been).

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: fsck-cache.c
===================================================================
--- ca8a271be1370ab0795a869c15114e566bdd15d8/fsck-cache.c  (mode:100644 sha1:280a104050b665515418c00c33af8e6b0b0e2101)
+++ c53109e78c0b0d8925a4198fdec8295620f8f349/fsck-cache.c  (mode:100644 sha1:c4ff5bf3d71c35236e769d30035b00acf08d452b)
@@ -135,6 +135,67 @@
 	return 0;
 }
 
+int read_refs_dirs(char *path)
+{
+	DIR *dir = opendir(path);
+	char filename[NAME_MAX+1];
+	struct dirent *de;
+	int total = 0;
+	char *base;
+
+	if (!dir) {
+		error("missing refs directory '%s'", path);
+		return 0;
+	}
+	strcpy(filename, path);
+	base = filename + strlen(path);
+	*(base++) = '/';
+	while ((de = readdir(dir)) != NULL) {
+		DIR *subdir;
+		char *subbase;
+		if (de->d_name[0] == '.')
+			continue;
+		strcpy(base, de->d_name);
+		subbase = base + strlen(de->d_name);
+		subdir = opendir(filename);
+		if (!subdir) {
+			fprintf(stderr, "can't open subdir %s\n", filename);
+			continue;
+		}
+		*(subbase++) = '/';
+		while ((de = readdir(subdir)) != NULL) {
+			char hex[41];
+			char sha1[20];
+			int fd;
+			struct object *obj;
+			if (de->d_name[0] == '.')
+				continue;
+			strcpy(subbase, de->d_name);
+			fd = open(filename, O_RDONLY);
+			if (fd < 0) {
+				fprintf(stderr, "Couldn't open %s\n", filename);
+				continue;
+			}
+			if ((read(fd, hex, 41) < 41) ||
+			    (hex[40] != '\n') ||
+			    get_sha1_hex(hex, sha1)) {
+				fprintf(stderr, "Couldn't read a hash from %s\n",
+					filename);
+				continue;
+			}
+			obj = parse_object(sha1);
+			obj->used = 1;
+			mark_reachable(obj, REACHABLE);
+			
+			total++;
+		}
+		closedir(subdir);
+	}
+	closedir(dir);
+
+	return total;
+}
+
 int main(int argc, char **argv)
 {
 	int i, heads;
@@ -183,6 +244,8 @@
 		error("expected sha1, got %s", arg);
 	}
 
+	heads += read_refs_dirs(".git/refs");
+
 	if (!heads) {
 		if (show_unreachable) {
 			fprintf(stderr, "unable to do reachability without a head\n");


^ permalink raw reply

* [ANNOUNCE] gitkdiff 0.1
From: Tejun Heo @ 2005-04-28  5:14 UTC (permalink / raw)
  To: git


 Hello, guys.

 I've hacked tkdiff and made a commit viewing utility.  Just download
the following tarball and unpack it whereever PATH points to.  It
assumes that all base git executables are visible via PATH.

 http://home-tj.org/gitui/files/gitui-200504281405.tar.gz

$ gitkdiff -h
/home/tj/bin/gitkdiff: illegal option -- h
GIT tkdiff - gitkdiff 0.1

Usage: gitkdiff [OPTS...] DIFFSPEC

OPTS are
    -h                      prints this help message and exit

DIFFSPEC can be one of
    [files...]              the current cache vs. working files
    -r R [files...]         files in commit R's parent vs. files in commit R
    -r R0 -r R1 [files...]  files in commit R0 vs. files in commit R1

If no file is specified, all modified files are shown.

-- 
tejun

^ permalink raw reply

* [4/5] Rework fsck-cache to use parse_object()
From: Daniel Barkalow @ 2005-04-28  5:13 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280055180.30848-100000@iabervon.org>

With support for parse_object() and tags in the core, fsck_cache can just
call them, and can be simplified a bit.

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: fsck-cache.c
===================================================================
--- dd06c679f8acb720976073de90a9eeabbc11e951/fsck-cache.c  (mode:100644 sha1:f9b1431dd8f4f3b426a7e410de952277aaa11401)
+++ ca8a271be1370ab0795a869c15114e566bdd15d8/fsck-cache.c  (mode:100644 sha1:280a104050b665515418c00c33af8e6b0b0e2101)
@@ -6,6 +6,7 @@
 #include "commit.h"
 #include "tree.h"
 #include "blob.h"
+#include "tag.h"
 
 #define REACHABLE 0x0001
 
@@ -21,6 +22,7 @@
 	/* Look up all the requirements, warn about missing objects.. */
 	for (i = 0; i < nr_objs; i++) {
 		struct object *obj = objs[i];
+		struct object_list *refs;
 
 		if (show_unreachable && !(obj->flags & REACHABLE)) {
 			printf("unreachable %s %s\n", obj->type, sha1_to_hex(obj->sha1));
@@ -35,101 +37,49 @@
 			printf("dangling %s %s\n", obj->type, 
 			       sha1_to_hex(obj->sha1));
 		}
+		for (refs = obj->refs; refs; refs = refs->next) {
+			if (!refs->item->parsed) {
+				printf("broken link from %s\n",
+				       sha1_to_hex(obj->sha1));
+				printf("              to %s\n",
+				       sha1_to_hex(refs->item->sha1));
+			}
+		}
 	}
 }
 
-static int fsck_tree(unsigned char *sha1, void *data, unsigned long size)
+static int fsck_tree(struct tree *item)
 {
-	struct tree *item = lookup_tree(sha1);
-	if (parse_tree(item))
-		return -1;
 	if (item->has_full_path) {
 		fprintf(stderr, "warning: fsck-cache: tree %s "
-			"has full pathnames in it\n", sha1_to_hex(sha1));
+			"has full pathnames in it\n", 
+			sha1_to_hex(item->object.sha1));
 	}
 	return 0;
 }
 
-static int fsck_commit(unsigned char *sha1, void *data, unsigned long size)
+static int fsck_commit(struct commit *commit)
 {
-	struct commit *commit = lookup_commit(sha1);
-	if (parse_commit(commit))
-		return -1;
 	if (!commit->tree)
 		return -1;
 	if (!commit->parents && show_root)
-		printf("root %s\n", sha1_to_hex(sha1));
+		printf("root %s\n", sha1_to_hex(commit->object.sha1));
 	if (!commit->date)
-		printf("bad commit date in %s\n", sha1_to_hex(sha1));
-	return 0;
-}
-
-static int fsck_blob(unsigned char *sha1, void *data, unsigned long size)
-{
-	struct blob *blob = lookup_blob(sha1);
-	blob->object.parsed = 1;
+		printf("bad commit date in %s\n", 
+		       sha1_to_hex(commit->object.sha1));
 	return 0;
 }
 
-static int fsck_tag(unsigned char *sha1, void *data, unsigned long size)
+static int fsck_tag(struct tag *tag)
 {
-	int typelen, taglen;
-	unsigned char object[20];
-	char object_hex[60];
-	const char *type_line, *tag_line, *sig_line;
-
-	if (size < 64)
-		return -1;
-	if (memcmp("object ", data, 7) || get_sha1_hex(data + 7, object))
-		return -1;
-
-	type_line = data + 48;
-	if (memcmp("\ntype ", type_line-1, 6))
-		return -1;
-
-	tag_line = strchr(type_line, '\n');
-	if (!tag_line || memcmp("tag ", ++tag_line, 4))
-		return -1;
-
-	sig_line = strchr(tag_line, '\n');
-	if (!sig_line)
-		return -1;
-	sig_line++;
-
-	typelen = tag_line - type_line - strlen("type \n");
-	if (typelen >= 20)
-		return -1;
-	taglen = sig_line - tag_line - strlen("tag \n");
-
 	if (!show_tags)
 		return 0;
 
-	strcpy(object_hex, sha1_to_hex(object));
-	printf("tagged %.*s %s (%.*s) in %s\n",
-		typelen, type_line + 5,
-		object_hex,
-		taglen, tag_line + 4,
-		sha1_to_hex(sha1));
-	return 0;
-}
-
-static int fsck_entry(unsigned char *sha1, char *tag, void *data, 
-		      unsigned long size)
-{
-	if (!strcmp(tag, "blob")) {
-		if (fsck_blob(sha1, data, size) < 0)
-			return -1;
-	} else if (!strcmp(tag, "tree")) {
-		if (fsck_tree(sha1, data, size) < 0)
-			return -1;
-	} else if (!strcmp(tag, "commit")) {
-		if (fsck_commit(sha1, data, size) < 0)
-			return -1;
-	} else if (!strcmp(tag, "tag")) {
-		if (fsck_tag(sha1, data, size) < 0)
-			return -1;
-	} else
-		return -1;
+	printf("tagged %s %s",
+	       tag->tagged->type,
+	       sha1_to_hex(tag->tagged->sha1));
+	printf(" (%s) in %s\n",
+	       tag->tag, sha1_to_hex(tag->object.sha1));
 	return 0;
 }
 
@@ -137,20 +87,17 @@
 {
 	unsigned char sha1[20];
 	if (!get_sha1_hex(hex, sha1)) {
-		unsigned long mapsize;
-		void *map = map_sha1_file(sha1, &mapsize);
-		if (map) {
-			char type[100];
-			unsigned long size;
-			void *buffer = unpack_sha1_file(map, mapsize, type, &size);
-			if (!buffer)
-				return -1;
-			if (check_sha1_signature(sha1, buffer, size, type) < 0)
-				printf("sha1 mismatch %s\n", sha1_to_hex(sha1));
-			munmap(map, mapsize);
-			if (!fsck_entry(sha1, type, buffer, size))
-				return 0;
-		}
+		struct object *obj = parse_object(sha1);
+		if (!obj)
+			return -1;
+		if (obj->type == blob_type)
+			return 0;
+		if (obj->type == tree_type)
+			return fsck_tree((struct tree *) obj);
+		if (obj->type == commit_type)
+			return fsck_commit((struct commit *) obj);
+		if (obj->type == tag_type)
+			return fsck_tag((struct tag *) obj);
 	}
 	return -1;
 }


^ permalink raw reply

* [3/5] Add function to parse an object of unspecified type
From: Daniel Barkalow @ 2005-04-28  5:11 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280055180.30848-100000@iabervon.org>

This adds a function that parses an object from the database when we have
to look up its actual type. It also checks the hash of the file, due to
its heritage as part of fsck-cache.

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: object.c
===================================================================
--- e08498de8d0216a24378c454597ecb38f65c4ee1/object.c  (mode:100644 sha1:91bbc6e5e2eadfb0a66b14d992eac260d07267f8)
+++ dd06c679f8acb720976073de90a9eeabbc11e951/object.c  (mode:100644 sha1:787cda1f5667cdd4e92b7c18eb115cf125f6df9c)
@@ -1,5 +1,9 @@
 #include "object.h"
+#include "blob.h"
+#include "tree.h"
+#include "commit.h"
 #include "cache.h"
+#include "tag.h"
 #include <stdlib.h>
 #include <string.h>
 
@@ -94,3 +98,39 @@
 		p = p->next;
 	}
 }
+
+struct object *parse_object(unsigned char *sha1)
+{
+	unsigned long mapsize;
+	void *map = map_sha1_file(sha1, &mapsize);
+	if (map) {
+		char type[100];
+		unsigned long size;
+		void *buffer = unpack_sha1_file(map, mapsize, type, &size);
+		if (!buffer)
+			return NULL;
+		if (check_sha1_signature(sha1, buffer, size, type) < 0)
+			printf("sha1 mismatch %s\n", sha1_to_hex(sha1));
+		munmap(map, mapsize);
+		if (!strcmp(type, "blob")) {
+			struct blob *ret = lookup_blob(sha1);
+			//parse_blob(ret);
+			return &ret->object;
+		} else if (!strcmp(type, "tree")) {
+			struct tree *ret = lookup_tree(sha1);
+			parse_tree(ret);
+			return &ret->object;
+		} else if (!strcmp(type, "commit")) {
+			struct commit *ret = lookup_commit(sha1);
+			parse_commit(ret);
+			return &ret->object;
+		} else if (!strcmp(type, "tag")) {
+			struct tag *ret = lookup_tag(sha1);
+			parse_tag(ret);
+			return &ret->object;
+		} else {
+			return NULL;
+		}
+	}
+	return NULL;
+}
Index: object.h
===================================================================
--- e08498de8d0216a24378c454597ecb38f65c4ee1/object.h  (mode:100644 sha1:bc607fd55f6ce4e56ce87766369b5d4d55ec79af)
+++ dd06c679f8acb720976073de90a9eeabbc11e951/object.h  (mode:100644 sha1:d53a35a4d7321b5ec970103208ac576f9f722dff)
@@ -22,6 +22,9 @@
 
 void created_object(unsigned char *sha1, struct object *obj);
 
+/** Returns the object, having parsed it to find out what it is. **/
+struct object *parse_object(unsigned char *sha1);
+
 void add_ref(struct object *refer, struct object *target);
 
 void mark_reachable(struct object *obj, unsigned int mask);


^ permalink raw reply

* [2/5] Add tag header/parser to library
From: Daniel Barkalow @ 2005-04-28  5:06 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280055180.30848-100000@iabervon.org>

This adds preliminary support for tags in the library. It doesn't even
store the signature, however, let alone provide any way of checking it.

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: Makefile
===================================================================
--- 40a5a5657d67569bdc0e37b8ea2b76ec42824604/Makefile  (mode:100644 sha1:2d2913b6b98ac836b43755b1304d2a838dad87dd)
+++ e08498de8d0216a24378c454597ecb38f65c4ee1/Makefile  (mode:100644 sha1:b533a2834da564f2fc8d00680aeac59c9d197bd8)
@@ -25,7 +25,7 @@
 install: $(PROG) $(SCRIPTS)
 	install $(PROG) $(SCRIPTS) $(HOME)/bin/
 
-LIB_OBJS=read-cache.o sha1_file.o usage.o object.o commit.o tree.o blob.o
+LIB_OBJS=read-cache.o sha1_file.o usage.o object.o commit.o tree.o blob.o tag.o
 LIB_FILE=libgit.a
 LIB_H=cache.h object.h
 
Index: tag.c
===================================================================
--- /dev/null  (tree:40a5a5657d67569bdc0e37b8ea2b76ec42824604)
+++ e08498de8d0216a24378c454597ecb38f65c4ee1/tag.c  (mode:100644 sha1:f079d83a9de5c3d6d98b2a29339ec1ef3a25f333)
@@ -0,0 +1,73 @@
+#include "tag.h"
+#include "cache.h"
+
+const char *tag_type = "tag";
+
+struct tag *lookup_tag(unsigned char *sha1)
+{
+        struct object *obj = lookup_object(sha1);
+        if (!obj) {
+                struct tag *ret = xmalloc(sizeof(struct tag));
+                memset(ret, 0, sizeof(struct tag));
+                created_object(sha1, &ret->object);
+                ret->object.type = tag_type;
+                return ret;
+        }
+        if (obj->type != tag_type) {
+                error("Object %s is a %s, not a tree", 
+                      sha1_to_hex(sha1), obj->type);
+                return NULL;
+        }
+        return (struct tag *) obj;
+}
+
+int parse_tag(struct tag *item)
+{
+        char type[20];
+        void *data, *bufptr;
+        unsigned long size;
+        if (item->object.parsed)
+                return 0;
+        item->object.parsed = 1;
+        data = bufptr = read_sha1_file(item->object.sha1, type, &size);
+        if (!data)
+                return error("Could not read %s",
+                             sha1_to_hex(item->object.sha1));
+        if (strcmp(type, tag_type))
+                return error("Object %s not a tag",
+                             sha1_to_hex(item->object.sha1));
+
+	int typelen, taglen;
+	unsigned char object[20];
+	const char *type_line, *tag_line, *sig_line;
+
+	if (size < 64)
+		return -1;
+	if (memcmp("object ", data, 7) || get_sha1_hex(data + 7, object))
+		return -1;
+
+	item->tagged = parse_object(object);
+
+	type_line = data + 48;
+	if (memcmp("\ntype ", type_line-1, 6))
+		return -1;
+
+	tag_line = strchr(type_line, '\n');
+	if (!tag_line || memcmp("tag ", ++tag_line, 4))
+		return -1;
+
+	sig_line = strchr(tag_line, '\n');
+	if (!sig_line)
+		return -1;
+	sig_line++;
+
+	typelen = tag_line - type_line - strlen("type \n");
+	if (typelen >= 20)
+		return -1;
+	taglen = sig_line - tag_line - strlen("tag \n");
+	item->tag = xmalloc(taglen + 1);
+	memcpy(item->tag, tag_line + 4, taglen);
+	item->tag[taglen] = '\0';
+
+	return 0;
+}
Index: tag.h
===================================================================
--- /dev/null  (tree:40a5a5657d67569bdc0e37b8ea2b76ec42824604)
+++ e08498de8d0216a24378c454597ecb38f65c4ee1/tag.h  (mode:100644 sha1:7ae7864d8a8477bfd22d4e29d78119d345f2bbb9)
@@ -0,0 +1,15 @@
+#ifndef TAG_H
+#define TAG_H
+
+#include "object.h"
+
+extern const char *tag_type;
+
+struct tag {
+	struct object object;
+	struct object *tagged;
+	char *tag;
+	char *signature; /* not actually implemented */
+};
+
+#endif /* TAG_H */


^ permalink raw reply

* [1/5] Consider a blob to be parsed
From: Daniel Barkalow @ 2005-04-28  5:04 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <Pine.LNX.4.21.0504280055180.30848-100000@iabervon.org>

We don't parse blobs at all, so any that we've got are as parsed as
they're going to get. Don't make fsck-cache mark them.

Signed-Off-By: Daniel Barkalow <barkalow@iabervon.org>
Index: blob.c
===================================================================
--- 89fdfd09b281fdf5071bc13a30ef683bd6851b61/blob.c  (mode:100644 sha1:3d99b93f020d84c5410c2b1056f2d7446b647d1e)
+++ 40a5a5657d67569bdc0e37b8ea2b76ec42824604/blob.c  (mode:100644 sha1:2fad8a5eb4c553190736870519f65c265dfb8526)
@@ -12,6 +12,7 @@
 		memset(ret, 0, sizeof(struct blob));
 		created_object(sha1, &ret->object);
 		ret->object.type = blob_type;
+		ret->object.parsed = 1;
 		return ret;
 	}
 	if (obj->parsed && obj->type != blob_type) {


^ permalink raw reply

* [0/5] Updates for library, fsck-cache
From: Daniel Barkalow @ 2005-04-28  5:00 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

This patch series updates the parser library for some new things, adds a
function to parse an object of a type that's understood by the library but
not known in advance by the program, updates fsck-cache to use it, and
adds support for fsck-cache getting the root set out of .git/refs/.

 1: Consider a blob to be parsed
 2: Add tag header/parser to library
 3: Add function to parse an object of unspecified type.
 4: Rework fsck-cache to use parse_object().
 5: Make fsck-cache read heads out of .git/refs/*/*

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* [PATCH (take 2)] diff-cache.c compilation warning fix.
From: Junio C Hamano @ 2005-04-28  4:59 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git
In-Reply-To: <7vekcvbkq8.fsf@assigned-by-dhcp.cox.net>

Nobody uses return value from show_new_file() function but it is
defined as returning int and falls off at the end without
returning.  Make it void.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

This is take 2.  Earlier one was a botched patch that left
"return -1" in the function.

diff-cache.c |    4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

# - [PATCH] diff-tree -p implies diff-tree -p -r
# + 04/27 21:50 diff.c clean up temporary file.
--- k/diff-cache.c
+++ l/diff-cache.c
@@ -38,14 +38,14 @@ static int get_stat_data(struct cache_en
 	return 0;
 }

-static int show_new_file(struct cache_entry *new)
+static void show_new_file(struct cache_entry *new)
 {
 	unsigned char *sha1;
 	unsigned int mode;

 	/* New file in the index: it might actually be different in the working copy */
 	if (get_stat_data(new, &sha1, &mode) < 0)
-		return -1;
+		return;

 	show_file("+", new, sha1, mode);
 }

^ permalink raw reply

* [PATCH] diff.c: clean temporary files
From: Junio C Hamano @ 2005-04-28  4:51 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

When diff-cache -p and friends are interrupted, they can leave
their temporary files behind.  Also when the external diff
program is killed instead of exiting (this usually happens when
piping the output to a pager, which can cause SIGPIPE when the
user quits viewing the diff early), they incorrectly died
without cleaning their temporary file.  This patch fixes these
problems.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

diff.c |   29 ++++++++++++++++++++++++-----
1 files changed, 24 insertions(+), 5 deletions(-)

# - [PATCH] diff-tree -p implies diff-tree -p -r
# + 04/27 21:50 diff.c clean up temporary file.
--- k/diff.c
+++ l/diff.c
@@ -3,6 +3,7 @@
  */
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <signal.h>
 #include "cache.h"
 #include "diff.h"
 
@@ -119,6 +120,9 @@ static void prepare_temp_file(const char
 
 	if (!one->file_valid) {
 	not_a_valid_file:
+		/* A '-' entry produces this for file-2, and
+		 * a '+' entry produces this for file-1.
+		 */
 		temp->name = "/dev/null";
 		strcpy(temp->hex, ".");
 		strcpy(temp->mode, ".");
@@ -139,7 +143,7 @@ static void prepare_temp_file(const char
 				goto not_a_valid_file;
 			die("stat(%s): %s", temp->name, strerror(errno));
 		}
-		strcpy(temp->hex, ".");
+		strcpy(temp->hex, sha1_to_hex(null_sha1));
 		sprintf(temp->mode, "%06o",
 			S_IFREG |ce_permissions(st.st_mode));
 	}
@@ -180,6 +184,11 @@ static void remove_tempfile(void)
 		}
 }
 
+static void remove_tempfile_on_signal(int signo)
+{
+	remove_tempfile();
+}
+
 /* An external diff command takes:
  *
  * diff-cmd name infile1 infile1-sha1 infile1-mode \
@@ -191,7 +200,8 @@ void run_external_diff(const char *name,
 		       struct diff_spec *two)
 {
 	struct diff_tempfile *temp = diff_temp;
-	int pid, status;
+	pid_t pid;
+	int status;
 	static int atexit_asked = 0;
 
 	if (one && two) {
@@ -203,6 +213,7 @@ void run_external_diff(const char *name,
 			atexit_asked = 1;
 			atexit(remove_tempfile);
 		}
+		signal(SIGINT, remove_tempfile_on_signal);
 	}
 
 	fflush(NULL);
@@ -230,9 +241,17 @@ void run_external_diff(const char *name,
 			printf("* Unmerged path %s\n", name);
 		exit(0);
 	}
-	if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status))
-		die("diff program failed");
-
+	if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status)) {
+		/* We do not check the exit status because typically
+		 * diff exits non-zero if files are different, and
+		 * we are not interested in knowing that.  We *knew*
+		 * they are different and that's why we ran diff
+		 * in the first place!  However if it dies by a signal,
+		 * we stop processing immediately.
+		 */
+		remove_tempfile();
+		die("external diff died unexpectedly.\n");
+	}
 	remove_tempfile();
 }
 


^ permalink raw reply

* Re: : Networking
From: Daniel Barkalow @ 2005-04-28  4:43 UTC (permalink / raw)
  To: Ryan Anderson
  Cc: Bram Cohen, Linus Torvalds, Andrew Morton, pasky, davem, git
In-Reply-To: <20050428035534.GB30308@mythryan2.michonline.com>

On Wed, 27 Apr 2005, Ryan Anderson wrote:

> Now, all that history I had, with the duplicated imlementation, and
> useless code is in my tree.
> 
> The current (as I understand it) policy is, "We don't want that
> history."  This means that the developer will build a new tree (maybe),
> export his patch and reimport it into a clean tree, making a much
> simpler history graph.

I've been doing just this. I actually import it in pieces, with a commit
between each, so it's just like I applied the patch series I'm about to
send out. It actually works beautifully, and, someday, I'll have the
series up on my site so that a maintainer can just pull it.

Honestly, I'm not interested long-term in my buggy history, even
locally; I'm interested in the clean history in which I make a series of
self-contained, logical modifications and they get merged upstream.

> What Andrew is doing isn't too far from this, in concept, it's just a
> lot more complicated because he's pulling something insane, like 27
> seperate trees, plus several hundred stand alone patches.
> 
> So, there's a *deliberate* desire to drop history and move some content
> around outside of version control.

I think it's more a desire to drop history as it actually happened, and
replace it with history as it should have happened. The one thing I would
like is the ability to provide merging help to poor souls who got part of
the messy history without preserving that history. I think having the head
of the clean series have a bunch of lines: "replaces <sha1>", where people
aren't supposed to have or want that commit, but if they've merged it,
they should know that the clean series includes its content.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* Re: suffix array/tree deltas (Was: The criss-cross merge case)
From: Daniel Barkalow @ 2005-04-28  4:30 UTC (permalink / raw)
  To: Zed A. Shaw; +Cc: git
In-Reply-To: <1114659700.5910.10.camel@thamachine>

On Wed, 27 Apr 2005, Zed A. Shaw wrote:

> On Wed, 2005-04-27 at 19:32 -0400, Daniel Barkalow wrote:
> 
> > My plan is to implement multi-file diff and merge with a suffix tree-based
> > algorithm, and then revisit the history stuff once we have a merger that
> > can do sensible things with this information.
> 
> Hey, that's neat.  I've already implemented two versions of this very
> thing with FastCST.  The original used suffix trees, but I found that
> there were plenty of pathological cases which chewed memory and
> processor.  Most of these cases were large (>1MB) PDF files.  Don't ask
> me why PDF drove suffix tree algorithms insane, but they just did.

I'm not too surprised; but can you hope to merge or compare PDFs
anyway? I'd think that you'd just screw up alignment or something. (Note
that we aren't using deltas for history storage, so we're not interested
in the "compressing multiple versions" aspect of diffs.) I think I want to
punt anything too binary-like and try to find an unambiguous history-based
difference (i.e., there was some commit in the past that replaced one of
the versions with the other; therefore, we want the replacing one).

I'm thinking of line-based compressed suffix trees, with the obvious delta
algorithm: make the trees, find the longest prefix of the file, find the
longest prefix of the rest of the file, add an insertion for a line
that doesn't match, repeat. I probably need a few extra things to
stabilize the process (prefer that the next chunk come from the same file,
prefer that it come from next in the file, ignore copied lines without
enough content).

I haven't actually started yet; I'm waiting for a weekend when I'm feeling
inspired and not too fried.

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply

* Re: kernel.org now has gitweb installed
From: Daniel Jacobowitz @ 2005-04-28  4:17 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: Git Mailing List
In-Reply-To: <42703E79.8050808@zytor.com>

On Wed, Apr 27, 2005 at 06:38:01PM -0700, H. Peter Anvin wrote:
> http://www.kernel.org/git/

Thanks!  Now all I crave is a version which can browse the file tree
and file history; but I think we're almost ready for that...

-- 
Daniel Jacobowitz
CodeSourcery, LLC

^ permalink raw reply

* [PATCH] diff-cache.c compilation warning fix.
From: Junio C Hamano @ 2005-04-28  4:15 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: git

Nobody uses return value from show_new_file() function but it is
defined as returning int and falls off at the end without
returning.  Make it void.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---

diff-cache.c |    2 +-
1 files changed, 1 insertion(+), 1 deletion(-)

# - [PATCH] diff-tree -p implies diff-tree -p -r
# + working tree
--- k/diff-cache.c
+++ l/diff-cache.c
@@ -38,7 +38,7 @@ static int get_stat_data(struct cache_en
 	return 0;
 }

-static int show_new_file(struct cache_entry *new)
+static void show_new_file(struct cache_entry *new)
 {
 	unsigned char *sha1;
 	unsigned int mode;

^ permalink raw reply

* Re: : Networking
From: Ryan Anderson @ 2005-04-28  3:55 UTC (permalink / raw)
  To: Bram Cohen; +Cc: Linus Torvalds, Andrew Morton, pasky, davem, git
In-Reply-To: <Pine.LNX.4.44.0504261332540.4678-100000@wax.eds.org>

On Tue, Apr 26, 2005 at 01:35:55PM -0700, Bram Cohen wrote:
> Linus Torvalds wrote:
> 
> > On Tue, 26 Apr 2005, Andrew Morton wrote:
> > >
> > > With bk I was resolving that by just smashing the patches on top of each
> > > other, ignoring the rejects and refreshing the topmost patch.  That
> > > approach actually resolved this linus-vs-davem dupe as well.
> >
> > Oh, wow. I didn't realize that your scripts were quite _that_ stupid, and
> > didn't actually take advantage of any automatic merges at all.
> >
> > If so, git should trivially do everything that BK ever did for you. Which
> > is not saying a lot ;)
> 
> No version control system will do a particularly good job of merging
> content which got passed around outside of the system. They can be made to
> sort-of handle some simple cases well, but fundamentally too much
> information is getting dropped.

One thing to keep in mind, about the way Linux development works (and
honestly, the way I think "git" development is currently working) is
that one of the things the version control system has to provide an easy
method to do is to abandon history that is messy.

For example, I'm adding a new driver, "foobar".

It uses the fancy quantum-bus, which is fairly new to the kernel.
The bus driver is new, and I go off, work in my private tree for a
month, fixing all kinds of quantum-entanglement related bugs, and
committing as I go.

I get everything working, and submit my driver.

The quantum-bus maintainer replies and says, "Hey, I reworked the API so
that you don't need to worry about all this quantum-entanglement stuff
anymore, just call compensate_for_heisenberg() before doing the DMA."

I swear for a day or two, and rework my driver, and resubmit it.

Now, all that history I had, with the duplicated imlementation, and
useless code is in my tree.

The current (as I understand it) policy is, "We don't want that
history."  This means that the developer will build a new tree (maybe),
export his patch and reimport it into a clean tree, making a much
simpler history graph.

What Andrew is doing isn't too far from this, in concept, it's just a
lot more complicated because he's pulling something insane, like 27
seperate trees, plus several hundred stand alone patches.

So, there's a *deliberate* desire to drop history and move some content
around outside of version control.

Now, the desire to pull a bunch of seperate trees, merge them, produce a
diff that roughly pertains to what came in from each tree, and collect
that as a patch series may be strange, but it seems to be working really
well at the moment, for Linux development.

> The solution is to get everyone using the same version control system,
> which is actually quite a workable solution if (a) the version control
> system in question is quite nice, and (b) there isn't some deep political
> reason why many people will never agree to use it.

Git (well, cogito, really) seems to be getting there awfully fast - I'm
rather impressed with the speed of it, and annoyed that I haven't had
the time to build up a test suite for merging!

-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply

* Re: Cogito nit: cg-update should default to "origin".
From: David A. Wheeler @ 2005-04-28  3:57 UTC (permalink / raw)
  To: Petr Baudis; +Cc: git
In-Reply-To: <20050428005337.GA3422@pasky.ji.cz>

I said:
>>Minor nit on Cogito: I think cg-update should default to "origin",
>>not the head, if you leave it unspecified. ... The origin seems (to me)
>>to be a MUCH more common situation (and thus the better default).

Petr Baudis replied:
> Actually, I wasn't too happy with the current update-to-HEAD special
> case...

Sounds like we're in agreement! Once the special case goes
away, cg-update in both concept & code essentially becomes:
  cg-pull ${1:-origin} && cg-merge
which has the wonderful advantage of being really, really
easy to explain.  ("cg-update ALWAYS pulls, then merges").

 > I think people do this cg-update without arguments so seldom
 > that changing this now shouldn't hurt much, right?

Absolutely!  Indeed, I find myself doing:
  cg-update {wait for something to happen} {oops} cg-update origin

> What about moving this special case
> to something like
> 	cg-restore
> and changing the defaulting of update and pull back to 'origin'?
...
> Another thing is to UI-wise maintain clear difference between cg-cancel
> and cg-restore. Do you think the names are distinctive enough? Any
> better naming ideas?

Good names for these operations seem to be tough to find.
"cg-cancel" seems odd anyway; you'd think you could
"cancel" a commit and then the commit would stop existing
(not true!).

I looked at a thesaurus; other options to cancel & restore
include: revert, recover, retrieve, reclaim, reclaim, undo.
You could even use the names cg-recover-deleted to recover
deleted files (what cg-update does now without parameters),
and use cg-cancel-edits or cg-cancel-changes to make
clearer commands.  But in the end I have a different idea, hold on...

elsewhere Dan Holmsand said:
 >How about making the restore thing a special case of cg-cancel instead?
 >"Restore deleted files", and "restore deleted and modified files and
 >unseek" are similar enough that people will now where to look.
 >Something like "cg-cancel -C" (for careful), that only restores deleted
 >files would do it, I think.

There's a big risk of not including the "-C" and suddenly losing
everthing.  Since there's NO way to recover these files,
a somewhat safer interface would probably be a better idea.
But merging the concepts may make sense if we can find a single
command name that would help people figure this out.

How about "cg-revert" or "cg-restore"?  The word "revert" is even
in the comments for cg-cancel, but now it makes sense to "revert"
or "restore" the existence of a file (whereas it's really odd
to "cancel" a file deletion).

A serious problem with cg-cancel (and previous cg-undo) is
big data loss, no recovery, of your recent work.... if it's
going to have less & more drastic operations, I'd sure hate
for the drastic operation to be the default.  There's also
missing functionality currently: often I want to revert to the
unedited state for just a single file, or just restore a single file.
So, how about this:

cg-revert [FILE...] or
cg-revert [-d|--deleted]|[-a|--all]
   Reverts some/all files back to the HEAD's state, eliminating changes

   If given a list of 1 or more files, this reverts just the named files
   to the HEAD state. If they were deleted, they are restored;
   if they were edited, their edits are PERMANENTLY LOST.
   If they haven't changed, nothing changes and there is no error.

   If given -d or --deleted, it reverts all deleted files.
   If given -a or --all, it reverts all files
   (everything), resuling in loss of all edits and removals.

How's that for a reasonable UI, replacing both cg-cancel
and cg-update's current no-parameter functionality?

--- David A. Wheeler

^ permalink raw reply

* suffix array/tree deltas (Was: The criss-cross merge case)
From: Zed A. Shaw @ 2005-04-28  3:41 UTC (permalink / raw)
  To: git
In-Reply-To: <Pine.LNX.4.21.0504271854240.30848-100000@iabervon.org>

On Wed, 2005-04-27 at 19:32 -0400, Daniel Barkalow wrote:
> On Wed, 27 Apr 2005, Bram Cohen wrote:
> 

> My plan is to implement multi-file diff and merge with a suffix tree-based
> algorithm, and then revisit the history stuff once we have a merger that
> can do sensible things with this information.

Hey, that's neat.  I've already implemented two versions of this very
thing with FastCST.  The original used suffix trees, but I found that
there were plenty of pathological cases which chewed memory and
processor.  Most of these cases were large (>1MB) PDF files.  Don't ask
me why PDF drove suffix tree algorithms insane, but they just did.

I recently switched to a suffix array based algorithm which actually
ends up being faster than the suffix tree alternative.  I'm not using
the most recent fastest algorithm and it still compares favorably with
xdelta.

There's tons of weird things about doing a delta based on suffix
arrays/trees, so feel free to pick my brain or the FastCST code if you
attempt it.  The difficult parts turn out to be making the suffix array
and searching for the matching/non-matching regions.  Once you do that
the actual delta algorithm is a simple while loop that keeps doing the
match/non-match detection.

Zed

^ permalink raw reply

* Re: A shortcoming of the git repo format
From: Ryan Anderson @ 2005-04-28  3:37 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Tom Lord, hpa, git
In-Reply-To: <Pine.LNX.4.58.0504271722260.18901@ppc970.osdl.org>

On Wed, Apr 27, 2005 at 05:57:07PM -0700, Linus Torvalds wrote:
> On Wed, 27 Apr 2005, Tom Lord wrote:
> 
> I'm not actually all that interested in SCM's. I'd have been much happier
> if I never had to start doing git in the first place. But circumstances
> not only forced me to do my own, it also so happens that I don't believe
> that there are many people around that have ever really _seen_ what my
> kind of development requirements are.

Oddly, I was trying to answer "Why distributed?" in a discussion the
"Joel On Software" forum.

The particular thread I posted on, well, was kind of stupid, but in case
anyone is curious: http://discuss.joelonsoftware.com/default.asp?joel.3.115346.51

What I said might help give an overview of how Linux development works,
from my point of view.  I only occassionally poke at interesting things
on the periphery on whims, but I poke at the SCMy aspects of it, so
maybe it's relevant. 

 Here's an overview of how the distributed world of Linux works:

 1. Linus has his personal tree.  He pushes it out on a regular basis to
 rsync.kernel.org (well, kinda - that's where it ends up at).

 2. "Trusted lieutenants" have their own trees.  Some keep these on
 *.kernel.org, some don't.

 3. Lots of other people have personal trees.  These can be pretty much
 anywhere.

 These trees are in a variety of formats today, some are in "git", some
 are still in BitKeeper, some are from a tarball, some are tarball +
 patches, some are git + patches.

 There are a variety of merging methods:

 a.  Provide a publicly accessible repository.  (Formerly BK, now "git")
 that Linus, or a maintainer (i.e, "trusted lieutenant") can grab it
 from.  In the email where this location is given, the patch is usually
 included, at least in a summary format.

 b.  Provide a series of emails, with a description per email followed,
 inline, with a patch.

 These merging methods can be done directly with Linus, or with anyone
 else who is interested.  (Generally, merging with Linus is for arch and
 subsystem maintainers, or random small things that are either obviously
 correct, useful, or just don't fit elsewhere.)

 So, that's the merge process, for the most part.

 Now, most patches these days are going through Andrew Morton - even if
 he's not actually submitting them personally, he's probably putting
 them into his tree for testing purposes.  (Networking changes go direct
 to Linus, but Andrew keeps an up to date version of them in his -mm
 series of kernels.)

 If code isn't accepted, well, one of a couple things happens:
 1. The patch is silently ignored.  (This is less of a problem these
 days.)

 2. The patch is commented on and someone says, "No".  (Generally, this
 happens a few times for "new" code, as people try to get the concept to
 fit into the kernel in the cleanest way.  There are a lot of style nits
 at this point, but also discussions of "Is this the right way to do
 this?" and "Do we need a more general method to do this instead of this
 hack?")

 Verifying that testing has occurred is less important than you might
 think.  This is basically because small patches either come with a
 description of the bug they fix and an expert in that area will ACK the
 patch, they touch an area that few people use and so the submitter is
 probably the best qualified person to provide a patch and they'll only
 hurt themselves if they haven't tested it, or, via the history of your
 submissions to the kernel, you are known to not submit bad code, so
 there's an expectation of quality.

 Furthermore, an incredible amount of testing occurs in the major public
 trees (Linus/-mm) between a release, so most absolutely major bugs are
 spotted fairly quickly, and if the problem is systemic in a change,
 that change can be reverted until the code improves.

 On the topic of checking into private branches - it's not so much a
 matter of "the parent never sees the changes" as "the parent doesn't
 see them right now".

 FWIW, at my place of employment, we switched from CVS to BitKeeper last
 summer, and it is significantly more pleasant to work with, in all
 aspects.

 Currently our entire development staff is working from home.  This
 still works well, as we can all check in locally, and submit changes to
 the master repository when changes are ready.  Between having a partner
 company in Japan working on our code, and our development staff working
 from home offices, we would have a horrific time getting any
 centralized SCM product to perform well.  With purely local
 repositories, local branching, and submissions via email or ssh, the
 process still works well and is *fast*.  CVS over slow network links is
 certainly not *fast*, and I'd be very surprised if Perforce is
 significantly better in that regard.

 I'll just say this, in closing - working with a decentralized SCM tool
 changes the way you work.  There is a Linux Kernel developer that I am
 aware of that keeps 27 or so seperate branches on his machine, so he
 can keep all the logically unrelated changes seperate from each other.
 He builds kernels off an additional branch that merges all the others
 together, and submits changes to Linus via 2 or 3 "rollup" trees he
 maintains.

 You just don't work like that in a centralized SCM, because branching
 isn't painless, in the same way.

-- 

Ryan Anderson
  sometimes Pug Majere

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox