git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] add generic, type aware object chain walker
@ 2008-02-24 14:43 Martin Koegler
  2008-02-24 14:43 ` [PATCH 2/4] builtin-fsck: move away from object-refs Martin Koegler
                   ` (2 more replies)
  0 siblings, 3 replies; 19+ messages in thread
From: Martin Koegler @ 2008-02-24 14:43 UTC (permalink / raw)
  To: Junio C Hamano, Shawn O. Pearce; +Cc: git, Martin Koegler

The requirements are:
* it may not crash on NULL pointers
* a callback function is needed, as index-pack/unpack-objects
  need to do different things
* the type information is needed to check the expected <-> real type
  and print better error messages

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
 Makefile |    4 +-
 fsck.c   |   84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 fsck.h   |   10 +++++++
 3 files changed, 96 insertions(+), 2 deletions(-)
 create mode 100644 fsck.c
 create mode 100644 fsck.h

diff --git a/Makefile b/Makefile
index a9b5a67..3b356f8 100644
--- a/Makefile
+++ b/Makefile
@@ -303,7 +303,7 @@ LIB_H = \
 	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
 	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
 	utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \
-	mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h ll-merge.h
+	mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h ll-merge.h fsck.h
 
 DIFF_OBJS = \
 	diff.o diff-lib.o diffcore-break.o diffcore-order.o \
@@ -327,7 +327,7 @@ LIB_OBJS = \
 	color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
 	convert.o attr.o decorate.o progress.o mailmap.o symlinks.o remote.o \
 	transport.o bundle.o walker.o parse-options.o ws.o archive.o branch.o \
-	ll-merge.o
+	ll-merge.o fsck.o
 
 BUILTIN_OBJS = \
 	builtin-add.o \
diff --git a/fsck.c b/fsck.c
new file mode 100644
index 0000000..92254fb
--- /dev/null
+++ b/fsck.c
@@ -0,0 +1,84 @@
+#include "cache.h"
+#include "object.h"
+#include "blob.h"
+#include "tree.h"
+#include "tree-walk.h"
+#include "commit.h"
+#include "tag.h"
+#include "fsck.h"
+
+static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+
+	if(parse_tree(tree))
+		return -1;
+
+	init_tree_desc(&desc, tree->buffer, tree->size);
+	while(tree_entry(&desc, &entry)) {
+		int result;
+		
+		if (S_ISGITLINK(entry.mode))
+			continue;
+		if (S_ISDIR(entry.mode))
+			result = walk(&lookup_tree(entry.sha1)->object, OBJ_TREE, data);
+		else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode))
+			result = walk(&lookup_blob(entry.sha1)->object, OBJ_BLOB, data);
+		else {
+			error("in tree %s: entry %s has bad mode %.6o\n",
+			      sha1_to_hex(tree->object.sha1), entry.path, entry.mode);
+			result = -1;
+		}
+		if (result)
+			return result;
+	}
+	return 0;
+}
+
+static int fsck_walk_commit(struct commit *commit, fsck_walk_func walk, void *data)
+{
+	struct commit_list *parents = commit->parents;
+	int result;
+
+	if(parse_commit(commit))
+		return -1;
+
+	result = walk((struct object*)commit->tree, OBJ_TREE, data);
+	if (result)
+		return result;
+
+	while (parents) {
+		result = walk((struct object*)parents->item, OBJ_COMMIT, data);
+		if (result)
+			return result;
+		parents = parents->next;
+	}
+	return 0;
+}
+
+static int fsck_walk_tag(struct tag *tag, fsck_walk_func walk, void *data)
+{
+	if(parse_tag(tag))
+		return -1;
+	return walk(tag->tagged, OBJ_ANY, data);
+}
+
+int fsck_walk(struct object *obj, fsck_walk_func walk, void *data)
+{
+	if (!obj)
+		return -1;
+	switch(obj->type) {
+	case OBJ_BLOB:
+		return 0;
+	case OBJ_TREE:
+		return fsck_walk_tree((struct tree*)obj, walk, data);
+	case OBJ_COMMIT:
+		return fsck_walk_commit((struct commit*)obj, walk, data);
+	case OBJ_TAG:
+		return fsck_walk_tag((struct tag*)obj, walk, data);
+	default:
+		error("Unknown object type for %s", sha1_to_hex(obj->sha1));
+		return -1;
+	}
+}
diff --git a/fsck.h b/fsck.h
new file mode 100644
index 0000000..fccc89f
--- /dev/null
+++ b/fsck.h
@@ -0,0 +1,10 @@
+#ifndef GIT_FSCK_H
+#define GIT_FSCK_H
+
+#define OBJ_ANY OBJ_BAD
+
+typedef int (*fsck_walk_func)(struct object *obj, int type, void *data);
+
+int fsck_walk(struct object *obj, fsck_walk_func walk, void *data);
+
+#endif
-- 
1.5.4.2.gf624.dirty

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/4] builtin-fsck: move away from object-refs
  2008-02-24 14:43 [PATCH 1/4] add generic, type aware object chain walker Martin Koegler
@ 2008-02-24 14:43 ` Martin Koegler
  2008-02-24 14:43   ` [PATCH 3/4] Remove unused object-ref code Martin Koegler
  2008-02-25  3:04 ` [PATCH 1/4] add generic, type aware object chain walker Shawn O. Pearce
  2008-02-25  3:08 ` Junio C Hamano
  2 siblings, 1 reply; 19+ messages in thread
From: Martin Koegler @ 2008-02-24 14:43 UTC (permalink / raw)
  To: Junio C Hamano, Shawn O. Pearce; +Cc: git, Martin Koegler

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
reachable.c is not the optimal thing for checking a repository.
Some problems are ignored by it while it bails out on other errors.

fsck should not bail out at the first error (well, there are some
die in tree-walk.c), so the fsck_walk callbacks return 0, even in
the case of an error. The error is propagated via errors_found.

The patch is slightly tested.

 builtin-fsck.c |   95 +++++++++++++++++++++++++++++++++++++++----------------
 1 files changed, 67 insertions(+), 28 deletions(-)

diff --git a/builtin-fsck.c b/builtin-fsck.c
index cc7524b..512346a 100644
--- a/builtin-fsck.c
+++ b/builtin-fsck.c
@@ -8,6 +8,7 @@
 #include "pack.h"
 #include "cache-tree.h"
 #include "tree-walk.h"
+#include "fsck.h"
 #include "parse-options.h"
 
 #define REACHABLE 0x0001
@@ -63,13 +64,70 @@ static int objwarning(struct object *obj, const char *err, ...)
 	return -1;
 }
 
+static int mark_object(struct object *obj, int type, void *data)
+{
+	struct tree *tree = NULL;
+	struct object *parent = data;
+
+	if (!obj) {
+		printf("broken link from %7s %s\n",
+			   typename(parent->type), sha1_to_hex(parent->sha1));
+		printf("broken link from %7s %s\n",
+			   (type==OBJ_ANY?"unknown":typename(type)), "unknown");
+		errors_found |= ERROR_REACHABLE;
+		return 0;
+	}
+
+	if (type != OBJ_ANY && obj->type != type) {
+		objerror(parent, "wrong object type in link");
+	}
+
+	if (obj->flags & REACHABLE)
+		return 0;
+	obj->flags |= REACHABLE;
+	if (!obj->parsed) {
+		if (parent && !has_sha1_file(obj->sha1)) {
+			printf("broken link from %7s %s\n",
+				 typename(parent->type), sha1_to_hex(parent->sha1));
+			printf("              to %7s %s\n",
+				 typename(obj->type), sha1_to_hex(obj->sha1));
+			errors_found |= ERROR_REACHABLE;
+		}
+		return 0;
+	}
+
+	if (obj->type == OBJ_TREE) {
+		obj->parsed = 0;
+		tree = (struct tree*)obj;
+		if (parse_tree(tree) < 0)
+			return 0; /* error already displayed */
+	}
+	fsck_walk(obj, mark_object, obj);
+	if (tree) {
+		free(tree->buffer);
+		tree->buffer=NULL;
+	}
+	return 0;
+}
+
+static void mark_object_reachable(struct object *obj)
+{
+	mark_object(obj, OBJ_ANY, 0);
+}
+
+static int mark_used(struct object *obj, int type, void *data)
+{
+	if (!obj)
+		return 0;
+	obj->used = 1;
+	return 0;
+}
+
 /*
  * Check a single reachable object
  */
 static void check_reachable_object(struct object *obj)
 {
-	const struct object_refs *refs;
-
 	/*
 	 * We obviously want the object to be parsed,
 	 * except if it was in a pack-file and we didn't
@@ -82,25 +140,6 @@ static void check_reachable_object(struct object *obj)
 		errors_found |= ERROR_REACHABLE;
 		return;
 	}
-
-	/*
-	 * Check that everything that we try to reference is also good.
-	 */
-	refs = lookup_object_refs(obj);
-	if (refs) {
-		unsigned j;
-		for (j = 0; j < refs->count; j++) {
-			struct object *ref = refs->ref[j];
-			if (ref->parsed ||
-			    (has_sha1_file(ref->sha1)))
-				continue;
-			printf("broken link from %7s %s\n",
-			       typename(obj->type), sha1_to_hex(obj->sha1));
-			printf("              to %7s %s\n",
-			       typename(ref->type), sha1_to_hex(ref->sha1));
-			errors_found |= ERROR_REACHABLE;
-		}
-	}
 }
 
 /*
@@ -414,6 +453,7 @@ static int fsck_sha1(const unsigned char *sha1)
 	if (obj->flags & SEEN)
 		return 0;
 	obj->flags |= SEEN;
+	fsck_walk (obj, mark_used, 0);
 	if (obj->type == OBJ_BLOB)
 		return 0;
 	if (obj->type == OBJ_TREE)
@@ -538,13 +578,13 @@ static int fsck_handle_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 		obj = lookup_object(osha1);
 		if (obj) {
 			obj->used = 1;
-			mark_reachable(obj, REACHABLE);
+			mark_object_reachable(obj);
 		}
 	}
 	obj = lookup_object(nsha1);
 	if (obj) {
 		obj->used = 1;
-		mark_reachable(obj, REACHABLE);
+		mark_object_reachable(obj);
 	}
 	return 0;
 }
@@ -574,7 +614,7 @@ static int fsck_handle_ref(const char *refname, const unsigned char *sha1, int f
 		error("%s: not a commit", refname);
 	default_refs++;
 	obj->used = 1;
-	mark_reachable(obj, REACHABLE);
+	mark_object_reachable(obj);
 
 	return 0;
 }
@@ -660,7 +700,7 @@ static int fsck_cache_tree(struct cache_tree *it)
 			      sha1_to_hex(it->sha1));
 			return 1;
 		}
-		mark_reachable(obj, REACHABLE);
+		mark_object_reachable(obj);
 		obj->used = 1;
 		if (obj->type != OBJ_TREE)
 			err |= objerror(obj, "non-tree in cache-tree");
@@ -693,7 +733,6 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
 {
 	int i, heads;
 
-	track_object_refs = 1;
 	errors_found = 0;
 
 	argc = parse_options(argc, argv, fsck_opts, fsck_usage, 0);
@@ -741,7 +780,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
 				continue;
 
 			obj->used = 1;
-			mark_reachable(obj, REACHABLE);
+			mark_object_reachable(obj);
 			heads++;
 			continue;
 		}
@@ -773,7 +812,7 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
 				continue;
 			obj = &blob->object;
 			obj->used = 1;
-			mark_reachable(obj, REACHABLE);
+			mark_object_reachable(obj);
 		}
 		if (active_cache_tree)
 			fsck_cache_tree(active_cache_tree);
-- 
1.5.4.2.gf624.dirty

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/4] Remove unused object-ref code
  2008-02-24 14:43 ` [PATCH 2/4] builtin-fsck: move away from object-refs Martin Koegler
@ 2008-02-24 14:43   ` Martin Koegler
  2008-02-24 14:43     ` [PATCH 4/4] builtin-fsck: reports missing parent commits Martin Koegler
  0 siblings, 1 reply; 19+ messages in thread
From: Martin Koegler @ 2008-02-24 14:43 UTC (permalink / raw)
  To: Junio C Hamano, Shawn O. Pearce; +Cc: git, Martin Koegler

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
 Makefile               |    2 +-
 builtin-fetch-pack.c   |    1 -
 builtin-pack-objects.c |    1 -
 builtin-rev-list.c     |    1 -
 commit.c               |   11 ------
 object-refs.c          |   87 ------------------------------------------------
 object.h               |    8 ----
 tag.c                  |    6 ---
 tree.c                 |   48 --------------------------
 upload-pack.c          |    1 -
 walker.c               |    1 -
 11 files changed, 1 insertions(+), 166 deletions(-)
 delete mode 100644 object-refs.c

diff --git a/Makefile b/Makefile
index 3b356f8..a184f6e 100644
--- a/Makefile
+++ b/Makefile
@@ -318,7 +318,7 @@ LIB_OBJS = \
 	patch-ids.o \
 	object.o pack-check.o pack-write.o patch-delta.o path.o pkt-line.o \
 	sideband.o reachable.o reflog-walk.o \
-	quote.o read-cache.o refs.o run-command.o dir.o object-refs.o \
+	quote.o read-cache.o refs.o run-command.o dir.o \
 	server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \
 	tag.o tree.o usage.o config.o environment.o ctype.o copy.o \
 	revision.o pager.o tree-walk.o xdiff-interface.o \
diff --git a/builtin-fetch-pack.c b/builtin-fetch-pack.c
index f401352..25f1915 100644
--- a/builtin-fetch-pack.c
+++ b/builtin-fetch-pack.c
@@ -386,7 +386,6 @@ static int everything_local(struct ref **refs, int nr_match, char **match)
 	int retval;
 	unsigned long cutoff = 0;
 
-	track_object_refs = 0;
 	save_commit_buffer = 0;
 
 	for (ref = *refs; ref; ref = ref->next) {
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 7dff653..d9f7db8 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -2012,7 +2012,6 @@ static void get_object_list(int ac, const char **av)
 
 	init_revisions(&revs, NULL);
 	save_commit_buffer = 0;
-	track_object_refs = 0;
 	setup_revisions(ac, av, &revs, NULL);
 
 	while (fgets(line, sizeof(line), stdin) != NULL) {
diff --git a/builtin-rev-list.c b/builtin-rev-list.c
index 6f7d5f8..921113f 100644
--- a/builtin-rev-list.c
+++ b/builtin-rev-list.c
@@ -607,7 +607,6 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
 		usage(rev_list_usage);
 
 	save_commit_buffer = revs.verbose_header || revs.grep_filter;
-	track_object_refs = 0;
 	if (bisect_list)
 		revs.limited = 1;
 
diff --git a/commit.c b/commit.c
index 22ce776..6684c4e 100644
--- a/commit.c
+++ b/commit.c
@@ -290,17 +290,6 @@ int parse_commit_buffer(struct commit *item, void *buffer, unsigned long size)
 	}
 	item->date = parse_commit_date(bufptr, tail);
 
-	if (track_object_refs) {
-		unsigned i = 0;
-		struct commit_list *p;
-		struct object_refs *refs = alloc_object_refs(n_refs);
-		if (item->tree)
-			refs->ref[i++] = &item->tree->object;
-		for (p = item->parents; p; p = p->next)
-			refs->ref[i++] = &p->item->object;
-		set_object_refs(&item->object, refs);
-	}
-
 	return 0;
 }
 
diff --git a/object-refs.c b/object-refs.c
deleted file mode 100644
index 5345671..0000000
--- a/object-refs.c
+++ /dev/null
@@ -1,87 +0,0 @@
-#include "cache.h"
-#include "object.h"
-#include "decorate.h"
-
-int track_object_refs = 0;
-
-static struct decoration ref_decorate;
-
-struct object_refs *lookup_object_refs(struct object *base)
-{
-	return lookup_decoration(&ref_decorate, base);
-}
-
-static void add_object_refs(struct object *obj, struct object_refs *refs)
-{
-	if (add_decoration(&ref_decorate, obj, refs))
-		die("object %s tried to add refs twice!", sha1_to_hex(obj->sha1));
-}
-
-struct object_refs *alloc_object_refs(unsigned count)
-{
-	struct object_refs *refs;
-	size_t size = sizeof(*refs) + count*sizeof(struct object *);
-
-	refs = xcalloc(1, size);
-	refs->count = count;
-	return refs;
-}
-
-static int compare_object_pointers(const void *a, const void *b)
-{
-	const struct object * const *pa = a;
-	const struct object * const *pb = b;
-	if (*pa == *pb)
-		return 0;
-	else if (*pa < *pb)
-		return -1;
-	else
-		return 1;
-}
-
-void set_object_refs(struct object *obj, struct object_refs *refs)
-{
-	unsigned int i, j;
-
-	/* Do not install empty list of references */
-	if (refs->count < 1) {
-		free(refs);
-		return;
-	}
-
-	/* Sort the list and filter out duplicates */
-	qsort(refs->ref, refs->count, sizeof(refs->ref[0]),
-	      compare_object_pointers);
-	for (i = j = 1; i < refs->count; i++) {
-		if (refs->ref[i] != refs->ref[i - 1])
-			refs->ref[j++] = refs->ref[i];
-	}
-	if (j < refs->count) {
-		/* Duplicates were found - reallocate list */
-		size_t size = sizeof(*refs) + j*sizeof(struct object *);
-		refs->count = j;
-		refs = xrealloc(refs, size);
-	}
-
-	for (i = 0; i < refs->count; i++)
-		refs->ref[i]->used = 1;
-	add_object_refs(obj, refs);
-}
-
-void mark_reachable(struct object *obj, unsigned int mask)
-{
-	const struct object_refs *refs;
-
-	if (!track_object_refs)
-		die("cannot do reachability with object refs turned off");
-	/* If we've been here already, don't bother */
-	if (obj->flags & mask)
-		return;
-	obj->flags |= mask;
-	refs = lookup_object_refs(obj);
-	if (refs) {
-		unsigned i;
-		for (i = 0; i < refs->count; i++)
-			mark_reachable(refs->ref[i], mask);
-	}
-}
diff --git a/object.h b/object.h
index 397bbfa..036bd66 100644
--- a/object.h
+++ b/object.h
@@ -35,14 +35,11 @@ struct object {
 	unsigned char sha1[20];
 };
 
-extern int track_object_refs;
-
 extern const char *typename(unsigned int type);
 extern int type_from_string(const char *str);
 
 extern unsigned int get_max_object_index(void);
 extern struct object *get_indexed_object(unsigned int);
-extern struct object_refs *lookup_object_refs(struct object *);
 
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
@@ -61,11 +58,6 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t
 /** Returns the object, with potentially excess memory allocated. **/
 struct object *lookup_unknown_object(const unsigned  char *sha1);
 
-struct object_refs *alloc_object_refs(unsigned count);
-void set_object_refs(struct object *obj, struct object_refs *refs);
-
-void mark_reachable(struct object *obj, unsigned int mask);
-
 struct object_list *object_list_insert(struct object *item,
 				       struct object_list **list_p);
 
diff --git a/tag.c b/tag.c
index 990134f..4470d2b 100644
--- a/tag.c
+++ b/tag.c
@@ -87,12 +87,6 @@ int parse_tag_buffer(struct tag *item, void *data, unsigned long size)
 		item->tagged = NULL;
 	}
 
-	if (item->tagged && track_object_refs) {
-		struct object_refs *refs = alloc_object_refs(1);
-		refs->ref[0] = item->tagged;
-		set_object_refs(&item->object, refs);
-	}
-
 	return 0;
 }
 
diff --git a/tree.c b/tree.c
index 87708ef..4b1825c 100644
--- a/tree.c
+++ b/tree.c
@@ -202,52 +202,6 @@ struct tree *lookup_tree(const unsigned char *sha1)
 	return (struct tree *) obj;
 }
 
-/*
- * NOTE! Tree refs to external git repositories
- * (ie gitlinks) do not count as real references.
- *
- * You don't have to have those repositories
- * available at all, much less have the objects
- * accessible from the current repository.
- */
-static void track_tree_refs(struct tree *item)
-{
-	int n_refs = 0, i;
-	struct object_refs *refs;
-	struct tree_desc desc;
-	struct name_entry entry;
-
-	/* Count how many entries there are.. */
-	init_tree_desc(&desc, item->buffer, item->size);
-	while (tree_entry(&desc, &entry)) {
-		if (S_ISGITLINK(entry.mode))
-			continue;
-		n_refs++;
-	}
-
-	/* Allocate object refs and walk it again.. */
-	i = 0;
-	refs = alloc_object_refs(n_refs);
-	init_tree_desc(&desc, item->buffer, item->size);
-	while (tree_entry(&desc, &entry)) {
-		struct object *obj;
-
-		if (S_ISGITLINK(entry.mode))
-			continue;
-		if (S_ISDIR(entry.mode))
-			obj = &lookup_tree(entry.sha1)->object;
-		else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode))
-			obj = &lookup_blob(entry.sha1)->object;
-		else {
-			warning("in tree %s: entry %s has bad mode %.6o\n",
-			     sha1_to_hex(item->object.sha1), entry.path, entry.mode);
-			obj = lookup_unknown_object(entry.sha1);
-		}
-		refs->ref[i++] = obj;
-	}
-	set_object_refs(&item->object, refs);
-}
-
 int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 {
 	if (item->object.parsed)
@@ -256,8 +210,6 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 	item->buffer = buffer;
 	item->size = size;
 
-	if (track_object_refs)
-		track_tree_refs(item);
 	return 0;
 }
 
diff --git a/upload-pack.c b/upload-pack.c
index b26d053..e5421db 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -393,7 +393,6 @@ static int get_common_commits(void)
 	char hex[41], last_hex[41];
 	int len;
 
-	track_object_refs = 0;
 	save_commit_buffer = 0;
 
 	for(;;) {
diff --git a/walker.c b/walker.c
index adc3e80..c10eca8 100644
--- a/walker.c
+++ b/walker.c
@@ -256,7 +256,6 @@ int walker_fetch(struct walker *walker, int targets, char **target,
 	int i;
 
 	save_commit_buffer = 0;
-	track_object_refs = 0;
 
 	for (i = 0; i < targets; i++) {
 		if (!write_ref || !write_ref[i])
-- 
1.5.4.2.gf624.dirty

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/4] builtin-fsck: reports missing parent commits
  2008-02-24 14:43   ` [PATCH 3/4] Remove unused object-ref code Martin Koegler
@ 2008-02-24 14:43     ` Martin Koegler
  0 siblings, 0 replies; 19+ messages in thread
From: Martin Koegler @ 2008-02-24 14:43 UTC (permalink / raw)
  To: Junio C Hamano, Shawn O. Pearce; +Cc: git, Martin Koegler

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
---
 builtin-fsck.c |   24 ++++++++++++++++++++++++
 commit.c       |    2 +-
 commit.h       |    1 +
 3 files changed, 26 insertions(+), 1 deletions(-)

diff --git a/builtin-fsck.c b/builtin-fsck.c
index 512346a..18a660f 100644
--- a/builtin-fsck.c
+++ b/builtin-fsck.c
@@ -394,6 +394,8 @@ static int fsck_commit(struct commit *commit)
 {
 	char *buffer = commit->buffer;
 	unsigned char tree_sha1[20], sha1[20];
+	struct commit_graft *graft;
+	int parents = 0;
 
 	if (verbose)
 		fprintf(stderr, "Checking commit %s\n",
@@ -411,6 +413,28 @@ static int fsck_commit(struct commit *commit)
 		if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
 			return objerror(&commit->object, "invalid 'parent' line format - bad sha1");
 		buffer += 48;
+		parents++;
+	}
+	graft = lookup_commit_graft(commit->object.sha1);
+	if (graft) {
+		struct commit_list *p = commit->parents;
+		parents = 0;
+		while (p && parents) {
+			p = p->next;
+			parents -- ;
+		}
+		if (graft->nr_parent == -1 && !parents)
+			; /* shallow commit */
+		else if (graft->nr_parent != parents)
+			return objerror(&commit->object, "graft objects missing");
+	} else {
+		struct commit_list *p = commit->parents;
+		while (p && parents) {
+			p = p->next;
+			parents -- ;
+		}
+		if (p || parents)
+			return objerror(&commit->object, "parent objects missing");
 	}
 	if (memcmp(buffer, "author ", 7))
 		return objerror(&commit->object, "invalid format - expected 'author' line");
diff --git a/commit.c b/commit.c
index 6684c4e..94d5b3d 100644
--- a/commit.c
+++ b/commit.c
@@ -193,7 +193,7 @@ static void prepare_commit_graft(void)
 	commit_graft_prepared = 1;
 }
 
-static struct commit_graft *lookup_commit_graft(const unsigned char *sha1)
+struct commit_graft *lookup_commit_graft(const unsigned char *sha1)
 {
 	int pos;
 	prepare_commit_graft();
diff --git a/commit.h b/commit.h
index 80d65b9..a1e9591 100644
--- a/commit.h
+++ b/commit.h
@@ -116,6 +116,7 @@ struct commit_graft {
 struct commit_graft *read_graft_line(char *buf, int len);
 int register_commit_graft(struct commit_graft *, int);
 int read_graft_file(const char *graft_file);
+struct commit_graft *lookup_commit_graft(const unsigned char *sha1);
 
 extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2, int cleanup);
 
-- 
1.5.4.2.gf624.dirty

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-24 14:43 [PATCH 1/4] add generic, type aware object chain walker Martin Koegler
  2008-02-24 14:43 ` [PATCH 2/4] builtin-fsck: move away from object-refs Martin Koegler
@ 2008-02-25  3:04 ` Shawn O. Pearce
  2008-02-25  7:26   ` Martin Koegler
  2008-02-25  3:08 ` Junio C Hamano
  2 siblings, 1 reply; 19+ messages in thread
From: Shawn O. Pearce @ 2008-02-25  3:04 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Junio C Hamano, git

Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
> +static int fsck_walk_commit(struct commit *commit, fsck_walk_func walk, void *data)
> +{
> +	struct commit_list *parents = commit->parents;
> +	int result;
> +
> +	if(parse_commit(commit))
> +		return -1;

Hmm.  Don't you need to get commit->parenst *after* it is parsed,
and not before?

> @@ -0,0 +1,10 @@
> +#ifndef GIT_FSCK_H
> +#define GIT_FSCK_H
> +
> +#define OBJ_ANY OBJ_BAD

Its unclear why this macro is necessary.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-24 14:43 [PATCH 1/4] add generic, type aware object chain walker Martin Koegler
  2008-02-24 14:43 ` [PATCH 2/4] builtin-fsck: move away from object-refs Martin Koegler
  2008-02-25  3:04 ` [PATCH 1/4] add generic, type aware object chain walker Shawn O. Pearce
@ 2008-02-25  3:08 ` Junio C Hamano
  2008-02-25  7:46   ` Martin Koegler
  2 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2008-02-25  3:08 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Shawn O. Pearce, git

Martin Koegler <mkoegler@auto.tuwien.ac.at> writes:

> diff --git a/Makefile b/Makefile
> index a9b5a67..3b356f8 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -303,7 +303,7 @@ LIB_H = \
>  	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
>  	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
>  	utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \
> -	mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h ll-merge.h
> +	mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h ll-merge.h fsck.h

I'd rather see a series does not depend on things in next that
you do not have to depend on, pretty please?

The patches have style issues everywhere.  The opening paren
that surrounds the conditional to if/while should always have a
SP before it, while function names at the function callsite
should immediately be followed by an open paren.

> +static int fsck_walk_tree(struct tree *tree, fsck_walk_func walk, void *data)
> +{
> +	struct tree_desc desc;
> +	struct name_entry entry;
> +
> +	if(parse_tree(tree))
> +		return -1;
> +
> +	init_tree_desc(&desc, tree->buffer, tree->size);
> +	while(tree_entry(&desc, &entry)) {
> +		int result;
> +		
> +		if (S_ISGITLINK(entry.mode))
> +			continue;
> +		if (S_ISDIR(entry.mode))
> +			result = walk(&lookup_tree(entry.sha1)->object, OBJ_TREE, data);
> +		else if (S_ISREG(entry.mode) || S_ISLNK(entry.mode))
> +			result = walk(&lookup_blob(entry.sha1)->object, OBJ_BLOB, data);
> +		else {
> +			error("in tree %s: entry %s has bad mode %.6o\n",
> +			      sha1_to_hex(tree->object.sha1), entry.path, entry.mode);
> +			result = -1;
> +		}

Would "result = error(...)" be too cute ;-)?

> +static int fsck_walk_commit(struct commit *commit, fsck_walk_func walk, void *data)
> +{
> +	struct commit_list *parents = commit->parents;
> +	int result;
> +
> +	if(parse_commit(commit))
> +		return -1;
> +
> +	result = walk((struct object*)commit->tree, OBJ_TREE, data);
> +	if (result)
> +		return result;
> +
> +	while (parents) {
> +		result = walk((struct object*)parents->item, OBJ_COMMIT, data);
> +		if (result)
> +			return result;
> +		parents = parents->next;
> +	}
> +	return 0;
> +}

Hmm.  For the purpose of proving there is _no_ error (or an
error or more), it would be Ok to return early like this, but
won't there be cases where you would want to get as many
coverage as possible?

For example, I do not think you can use this to mark reachable
objects.  Even if you find error walking the first parent
history, you would want to still mark a healthy second parent
history reachable.

> diff --git a/fsck.h b/fsck.h
> new file mode 100644
> index 0000000..fccc89f
> --- /dev/null
> +++ b/fsck.h
> @@ -0,0 +1,10 @@
> +#ifndef GIT_FSCK_H
> +#define GIT_FSCK_H
> +
> +#define OBJ_ANY OBJ_BAD
> +
> +typedef int (*fsck_walk_func)(struct object *obj, int type, void *data);

It is unclear in this patch, but this "type" is not telling what
the type of the object is to the walk_func, but is telling it
that the given object was found in a place where object of that
type is expected/required, so that walk_func can check and
complain, right?  Needs a bit of commenting...

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  3:04 ` [PATCH 1/4] add generic, type aware object chain walker Shawn O. Pearce
@ 2008-02-25  7:26   ` Martin Koegler
  2008-02-25  7:37     ` Junio C Hamano
  0 siblings, 1 reply; 19+ messages in thread
From: Martin Koegler @ 2008-02-25  7:26 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Junio C Hamano, git

On Sun, Feb 24, 2008 at 10:04:04PM -0500, Shawn O. Pearce wrote:
> Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
> Hmm.  Don't you need to get commit->parenst *after* it is parsed,
> and not before?

Thanks, fixed.

> > @@ -0,0 +1,10 @@
> > +#ifndef GIT_FSCK_H
> > +#define GIT_FSCK_H
> > +
> > +#define OBJ_ANY OBJ_BAD
> 
> Its unclear why this macro is necessary.

There are cases (eg. a tag->tagged), where the object type does not
matter. In this case, the callback gets this value for the expected
type.

The value of OBJ_BAD (-1) is not used anywhere in git, so its
free. OBJ_ANY is just a better name.

If somebody has a better idea, please tell me.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:26   ` Martin Koegler
@ 2008-02-25  7:37     ` Junio C Hamano
  2008-02-25  7:52       ` Martin Koegler
  0 siblings, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2008-02-25  7:37 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Shawn O. Pearce, git

mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:

> On Sun, Feb 24, 2008 at 10:04:04PM -0500, Shawn O. Pearce wrote:
>> Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
>> Hmm.  Don't you need to get commit->parenst *after* it is parsed,
>> and not before?
>
> Thanks, fixed.
>
>> > @@ -0,0 +1,10 @@
>> > +#ifndef GIT_FSCK_H
>> > +#define GIT_FSCK_H
>> > +
>> > +#define OBJ_ANY OBJ_BAD
>> 
>> Its unclear why this macro is necessary.
>
> There are cases (eg. a tag->tagged), where the object type does not
> matter. In this case, the callback gets this value for the expected
> type.
>
> The value of OBJ_BAD (-1) is not used anywhere in git, so its
> free. OBJ_ANY is just a better name.
>
> If somebody has a better idea, please tell me.

As I mentioned in my previous review comment, the token is used
to tell the walker callback function that this type of object
was expected where it was found, in order to allow the callback
to check the type of the object.  I think OBJ_ANY is a very good
name for the token you would use to tell the callback that any
type of object can appear (because that is a taggee).

So I do not have objection to OBJ_ANY (but again, this kind of
thing needs to be explained in your commit log message), but
giving the token the same value as OBJ_BAD may not be such a
cool idea.  After all, if the walker callback was told with
OBJ_ANY that any type of object is Ok, it should still say
"oops" if the given object said it actually is of type OBJ_BAD.
E.g. in your [2/4] patch:

        +static int mark_object(struct object *obj, int type, void *data)
        +{
        + ...
        +	if (type != OBJ_ANY && obj->type != type) {
        +		objerror(parent, "wrong object type in link");
        +	}

if you use the above #define, a tagged object that has a bad
type will pass this check unnoticed, won't it?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  3:08 ` Junio C Hamano
@ 2008-02-25  7:46   ` Martin Koegler
  2008-02-25  7:59     ` Junio C Hamano
  2008-02-25  8:10     ` Shawn O. Pearce
  0 siblings, 2 replies; 19+ messages in thread
From: Martin Koegler @ 2008-02-25  7:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn O. Pearce, git

On Sun, Feb 24, 2008 at 07:08:39PM -0800, Junio C Hamano wrote:
> > diff --git a/Makefile b/Makefile
> > index a9b5a67..3b356f8 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -303,7 +303,7 @@ LIB_H = \
> >  	run-command.h strbuf.h tag.h tree.h git-compat-util.h revision.h \
> >  	tree-walk.h log-tree.h dir.h path-list.h unpack-trees.h builtin.h \
> >  	utf8.h reflog-walk.h patch-ids.h attr.h decorate.h progress.h \
> > -	mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h ll-merge.h
> > +	mailmap.h remote.h parse-options.h transport.h diffcore.h hash.h ll-merge.h fsck.h
> 
> I'd rather see a series does not depend on things in next that
> you do not have to depend on, pretty please?

I usually develop my patch on next. I can offer you two things:
* base my patches on something different (master?)
* add fsck.h/o some lines above

What do you prefer?

> > +static int fsck_walk_commit(struct commit *commit, fsck_walk_func walk, void *data)
> > +{
> > +	struct commit_list *parents = commit->parents;
> > +	int result;
> > +
> > +	if(parse_commit(commit))
> > +		return -1;
> > +
> > +	result = walk((struct object*)commit->tree, OBJ_TREE, data);
> > +	if (result)
> > +		return result;
> > +
> > +	while (parents) {
> > +		result = walk((struct object*)parents->item, OBJ_COMMIT, data);
> > +		if (result)
> > +			return result;
> > +		parents = parents->next;
> > +	}
> > +	return 0;
> > +}
> 
> Hmm.  For the purpose of proving there is _no_ error (or an
> error or more), it would be Ok to return early like this, but
> won't there be cases where you would want to get as many
> coverage as possible?
> 
> For example, I do not think you can use this to mark reachable
> objects.  Even if you find error walking the first parent
> history, you would want to still mark a healthy second parent
> history reachable.

How should I define the return value of fsck_walk in the presence of
multiple errors?

It would not be necessary for all my users:

* in unpack-object and index-pack (I'll send an updated patch in the
  next days), any error means that we can abort. Further checking would
  mean wasting of resources.

* in fsck (patch 2) the error is signaled by the errors_found variable, so
  all callbacks can return 0, even in the case of an error. Checking the
  return value of fsck_walk would mean duplicate error messages.

mfg Martin Kögler

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:37     ` Junio C Hamano
@ 2008-02-25  7:52       ` Martin Koegler
  2008-02-25  8:02         ` Junio C Hamano
  2008-02-25  8:04         ` Shawn O. Pearce
  0 siblings, 2 replies; 19+ messages in thread
From: Martin Koegler @ 2008-02-25  7:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn O. Pearce, git

On Sun, Feb 24, 2008 at 11:37:25PM -0800, Junio C Hamano wrote:
> mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:
> So I do not have objection to OBJ_ANY (but again, this kind of
> thing needs to be explained in your commit log message), but
> giving the token the same value as OBJ_BAD may not be such a
> cool idea.  

What about
#define OBJ_BAD -2

> After all, if the walker callback was told with
> OBJ_ANY that any type of object is Ok, it should still say
> "oops" if the given object said it actually is of type OBJ_BAD.
> E.g. in your [2/4] patch:
> 
>         +static int mark_object(struct object *obj, int type, void *data)
>         +{
>         + ...
>         +	if (type != OBJ_ANY && obj->type != type) {
>         +		objerror(parent, "wrong object type in link");
>         +	}
> 
> if you use the above #define, a tagged object that has a bad
> type will pass this check unnoticed, won't it?

No, it wouldn't, as object->type is never initialized to OBJ_BAD:
$ grep "OBJ_BAD" *.c *.h
cache.h:        OBJ_BAD = -1,

mfg Martin Kögler

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:46   ` Martin Koegler
@ 2008-02-25  7:59     ` Junio C Hamano
  2008-02-25  8:21       ` Martin Koegler
  2008-02-25  8:10     ` Shawn O. Pearce
  1 sibling, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2008-02-25  7:59 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Shawn O. Pearce, git

mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:

> On Sun, Feb 24, 2008 at 07:08:39PM -0800, Junio C Hamano wrote:
> ...
>> > +	while (parents) {
>> > +		result = walk((struct object*)parents->item, OBJ_COMMIT, data);
>> > +		if (result)
>> > +			return result;
>> > +		parents = parents->next;
>> > +	}
>> > +	return 0;
>> > +}
>> 
>> Hmm.  For the purpose of proving there is _no_ error (or an
>> error or more), it would be Ok to return early like this, but
>> won't there be cases where you would want to get as many
>> coverage as possible?
>> 
>> For example, I do not think you can use this to mark reachable
>> objects.  Even if you find error walking the first parent
>> history, you would want to still mark a healthy second parent
>> history reachable.
>
> How should I define the return value of fsck_walk in the presence of
> multiple errors?

Returning error is fine.  That is not what I was talking about.

I was talking about an early return in the code, that does not
callback once you find an error.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:52       ` Martin Koegler
@ 2008-02-25  8:02         ` Junio C Hamano
  2008-02-25  8:06           ` Martin Koegler
  2008-02-25  8:04         ` Shawn O. Pearce
  1 sibling, 1 reply; 19+ messages in thread
From: Junio C Hamano @ 2008-02-25  8:02 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Shawn O. Pearce, git

mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:

> What about
> #define OBJ_BAD -2

You mean "#define OBJ_ANY -2"?

>> After all, if the walker callback was told with
>> OBJ_ANY that any type of object is Ok, it should still say
>> "oops" if the given object said it actually is of type OBJ_BAD.
>> E.g. in your [2/4] patch:
>> 
>>         +static int mark_object(struct object *obj, int type, void *data)
>>         +{
>>         + ...
>>         +	if (type != OBJ_ANY && obj->type != type) {
>>         +		objerror(parent, "wrong object type in link");
>>         +	}
>> 
>> if you use the above #define, a tagged object that has a bad
>> type will pass this check unnoticed, won't it?
>
> No, it wouldn't, as object->type is never initialized to OBJ_BAD:
> $ grep "OBJ_BAD" *.c *.h
> cache.h:        OBJ_BAD = -1,

Don't you think that is too subtle?  Don't you want to future
proof your code?

Using the OBJ_ANY (which I'd agree is a good name) that is
different from anything else that is already defined would be a
good idea for that, I would think.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:52       ` Martin Koegler
  2008-02-25  8:02         ` Junio C Hamano
@ 2008-02-25  8:04         ` Shawn O. Pearce
  2008-02-25 17:49           ` Nicolas Pitre
  1 sibling, 1 reply; 19+ messages in thread
From: Shawn O. Pearce @ 2008-02-25  8:04 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Junio C Hamano, git

Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
> 
> What about
> #define OBJ_BAD -2

All of the other OBJ_* constants are declared in the enum
object_type, not as #defines.

See below for why this should not be <0.
 
> > After all, if the walker callback was told with
> > OBJ_ANY that any type of object is Ok, it should still say
> > "oops" if the given object said it actually is of type OBJ_BAD.
> > E.g. in your [2/4] patch:
> > 
> >         +static int mark_object(struct object *obj, int type, void *data)
> >         +{
> >         + ...
> >         +	if (type != OBJ_ANY && obj->type != type) {
> >         +		objerror(parent, "wrong object type in link");
> >         +	}
> > 
> > if you use the above #define, a tagged object that has a bad
> > type will pass this check unnoticed, won't it?
> 
> No, it wouldn't, as object->type is never initialized to OBJ_BAD:
> $ grep "OBJ_BAD" *.c *.h
> cache.h:        OBJ_BAD = -1,

Today that may be true.  OBJ_BAD was created for cases where the
internal object header parsing code in say sha1_file.c finds a type
code it doesn't recognize and wants to return it back to the caller.
This grew out of some of the early pack v4 work.

It actually happens today when a function that returns an object
type enum returns failure.  sha1_object_info() is one such function.
It returns "int" but that int is also really an enum object_type.
When that function fails it returns -1, which has it happens
is OBJ_BAD.

Subtle, yes.  But OBJ_BAD has meaning and is in use today.  Please do
not use it for "we don't care what it is".

-- 
Shawn.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  8:02         ` Junio C Hamano
@ 2008-02-25  8:06           ` Martin Koegler
  2008-02-25  8:12             ` Shawn O. Pearce
  0 siblings, 1 reply; 19+ messages in thread
From: Martin Koegler @ 2008-02-25  8:06 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn O. Pearce, git

On Mon, Feb 25, 2008 at 12:02:15AM -0800, Junio C Hamano wrote:
> mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:
> 
> > What about
> > #define OBJ_BAD -2
> 
> You mean "#define OBJ_ANY -2"?

Yes.

Should it go into cache.h or should it stay in fsck.h?

mfg Martin Kögler

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:46   ` Martin Koegler
  2008-02-25  7:59     ` Junio C Hamano
@ 2008-02-25  8:10     ` Shawn O. Pearce
  1 sibling, 0 replies; 19+ messages in thread
From: Shawn O. Pearce @ 2008-02-25  8:10 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Junio C Hamano, git

Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
> On Sun, Feb 24, 2008 at 07:08:39PM -0800, Junio C Hamano wrote:
> > 
> > I'd rather see a series does not depend on things in next that
> > you do not have to depend on, pretty please?
> 
> I usually develop my patch on next. I can offer you two things:
> * base my patches on something different (master?)
> * add fsck.h/o some lines above
> 
> What do you prefer?

Don't build patch series against "next" unless they will trivially
apply *and function correctly* also on "master".  E.g. a trivially
obvious one liner may be OK to develop on "next", but everything
else should be done against "master", or if you must the tip of the
topic in "next" that you desire to build upon.

E.g. when I was working on the builtin-fetch debugging and I was
building on top of that topic in next, not all of next itself.
 
> > For example, I do not think you can use this to mark reachable
> > objects.  Even if you find error walking the first parent
> > history, you would want to still mark a healthy second parent
> > history reachable.
> 
> How should I define the return value of fsck_walk in the presence of
> multiple errors?
> 
> It would not be necessary for all my users:
> 
> * in unpack-object and index-pack (I'll send an updated patch in the
>   next days), any error means that we can abort. Further checking would
>   mean wasting of resources.
> 
> * in fsck (patch 2) the error is signaled by the errors_found variable, so
>   all callbacks can return 0, even in the case of an error. Checking the
>   return value of fsck_walk would mean duplicate error messages.

So the return value is more about "keep walking" than it is about an
error (or lack there of)?

-- 
Shawn.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  8:06           ` Martin Koegler
@ 2008-02-25  8:12             ` Shawn O. Pearce
  2008-02-25 17:35               ` Nicolas Pitre
  0 siblings, 1 reply; 19+ messages in thread
From: Shawn O. Pearce @ 2008-02-25  8:12 UTC (permalink / raw)
  To: Martin Koegler; +Cc: Junio C Hamano, git

Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
> On Mon, Feb 25, 2008 at 12:02:15AM -0800, Junio C Hamano wrote:
> > mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:
> > 
> > > What about
> > > #define OBJ_BAD -2
> > 
> > You mean "#define OBJ_ANY -2"?
> 
> Yes.
> 
> Should it go into cache.h or should it stay in fsck.h?

I'd rather see this declared at the end of enum object_type, as
essentially OBJ_MAX-1.  But Nico may have a different opinion.

As such it shouldn't need a numerical constant.  We don't encode
this constant into packfiles.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  7:59     ` Junio C Hamano
@ 2008-02-25  8:21       ` Martin Koegler
  0 siblings, 0 replies; 19+ messages in thread
From: Martin Koegler @ 2008-02-25  8:21 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Shawn O. Pearce, git

On Sun, Feb 24, 2008 at 11:59:15PM -0800, Junio C Hamano wrote:
> mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:
> > How should I define the return value of fsck_walk in the presence of
> > multiple errors?
> 
> Returning error is fine.  That is not what I was talking about.
> 
> I was talking about an early return in the code, that does not
> callback once you find an error.

That is not what I was talking about. 

We need to define the return code of fsck_walk in the case of multiple
errors. What error (of all possible) should it return?

What about this new behaviour:

Callback return values:
* 0 everything OK
* > 0 error, but continue
* < 0 error, but stop

The return value of fsck_walk would be:
* -1, if fsck_walk discoveres an error (eg. not parseable object)
* first callback return value < 0 (and stops further processing in this case)
* first callback return value > 0 (if not callback return value < 0)
* 0, if no errors are discovered

mfg Martin Kögler

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  8:12             ` Shawn O. Pearce
@ 2008-02-25 17:35               ` Nicolas Pitre
  0 siblings, 0 replies; 19+ messages in thread
From: Nicolas Pitre @ 2008-02-25 17:35 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Martin Koegler, Junio C Hamano, git

On Mon, 25 Feb 2008, Shawn O. Pearce wrote:

> Martin Koegler <mkoegler@auto.tuwien.ac.at> wrote:
> > On Mon, Feb 25, 2008 at 12:02:15AM -0800, Junio C Hamano wrote:
> > > mkoegler@auto.tuwien.ac.at (Martin Koegler) writes:
> > > 
> > > > What about
> > > > #define OBJ_BAD -2
> > > 
> > > You mean "#define OBJ_ANY -2"?
> > 
> > Yes.
> > 
> > Should it go into cache.h or should it stay in fsck.h?
> 
> I'd rather see this declared at the end of enum object_type, as
> essentially OBJ_MAX-1.  But Nico may have a different opinion.

Why not alias OBJ_ANY to OBJ_NONE instead?


Nicolas

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/4] add generic, type aware object chain walker
  2008-02-25  8:04         ` Shawn O. Pearce
@ 2008-02-25 17:49           ` Nicolas Pitre
  0 siblings, 0 replies; 19+ messages in thread
From: Nicolas Pitre @ 2008-02-25 17:49 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: Martin Koegler, Junio C Hamano, git

On Mon, 25 Feb 2008, Shawn O. Pearce wrote:

> > No, it wouldn't, as object->type is never initialized to OBJ_BAD:
> > $ grep "OBJ_BAD" *.c *.h
> > cache.h:        OBJ_BAD = -1,
> 
> Today that may be true.  OBJ_BAD was created for cases where the
> internal object header parsing code in say sha1_file.c finds a type
> code it doesn't recognize and wants to return it back to the caller.
> This grew out of some of the early pack v4 work.
> 
> It actually happens today when a function that returns an object
> type enum returns failure.  sha1_object_info() is one such function.
> It returns "int" but that int is also really an enum object_type.
> When that function fails it returns -1, which has it happens
> is OBJ_BAD.
> 
> Subtle, yes.  But OBJ_BAD has meaning and is in use today.  Please do
> not use it for "we don't care what it is".

Actually, the reason for OBJ_BAD is possibly even more subtle than that.  
It is "documented" in commit fef742c4ed though.

The idea is for some function returning an object type to also be able 
to return an error condition without the overhead and/or hassle of a 
separate argument.  So the object space is returned as a positive value, 
and error space is negative.  The only purpose of OBJ_BAD was to make 
sure object_type is a signed type in order to avoid issues with 
compilers which might interpret the C standard in a way that would 
reduce the enum to its minimal expression such as some "signedlessness".


Nicolas

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2008-02-25 17:50 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-24 14:43 [PATCH 1/4] add generic, type aware object chain walker Martin Koegler
2008-02-24 14:43 ` [PATCH 2/4] builtin-fsck: move away from object-refs Martin Koegler
2008-02-24 14:43   ` [PATCH 3/4] Remove unused object-ref code Martin Koegler
2008-02-24 14:43     ` [PATCH 4/4] builtin-fsck: reports missing parent commits Martin Koegler
2008-02-25  3:04 ` [PATCH 1/4] add generic, type aware object chain walker Shawn O. Pearce
2008-02-25  7:26   ` Martin Koegler
2008-02-25  7:37     ` Junio C Hamano
2008-02-25  7:52       ` Martin Koegler
2008-02-25  8:02         ` Junio C Hamano
2008-02-25  8:06           ` Martin Koegler
2008-02-25  8:12             ` Shawn O. Pearce
2008-02-25 17:35               ` Nicolas Pitre
2008-02-25  8:04         ` Shawn O. Pearce
2008-02-25 17:49           ` Nicolas Pitre
2008-02-25  3:08 ` Junio C Hamano
2008-02-25  7:46   ` Martin Koegler
2008-02-25  7:59     ` Junio C Hamano
2008-02-25  8:21       ` Martin Koegler
2008-02-25  8:10     ` Shawn O. Pearce

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).