git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Michael Haggerty <mhagger@alum.mit.edu>,
	Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk
Date: Wed, 15 Oct 2014 18:37:28 -0400	[thread overview]
Message-ID: <20141015223728.GF25630@peff.net> (raw)
In-Reply-To: <20141015223244.GA25368@peff.net>

To find the set of reachable objects, we add a bunch of
possible sources to our rev_info, call prepare_revision_walk,
and then launch into a custom walker that handles each
object top. This is a subset of what traverse_commit_list
does, so we can just reuse that code (it can also handle
more complex cases like UNINTERESTING commits and pathspecs,
but we don't use those features).

Signed-off-by: Jeff King <peff@peff.net>
---
I was concerned this would be slower because traverse_commit_list is
more featureful. To my surprise, it was consistently about 3-4% faster!
The major difference is that traverse_commit_list will hit all of the
commits first, and then the trees. For reachability that doesn't matter
either way, but I suspect the new way has slightly better cache
locality, leading to the minor speedup.

 reachable.c | 130 ++++++++----------------------------------------------------
 1 file changed, 17 insertions(+), 113 deletions(-)

diff --git a/reachable.c b/reachable.c
index 6f6835b..02bf6c2 100644
--- a/reachable.c
+++ b/reachable.c
@@ -8,6 +8,7 @@
 #include "reachable.h"
 #include "cache-tree.h"
 #include "progress.h"
+#include "list-objects.h"
 
 struct connectivity_progress {
 	struct progress *progress;
@@ -21,118 +22,6 @@ static void update_progress(struct connectivity_progress *cp)
 		display_progress(cp->progress, cp->count);
 }
 
-static void process_blob(struct blob *blob,
-			 struct object_array *p,
-			 struct name_path *path,
-			 const char *name,
-			 struct connectivity_progress *cp)
-{
-	struct object *obj = &blob->object;
-
-	if (!blob)
-		die("bad blob object");
-	if (obj->flags & SEEN)
-		return;
-	obj->flags |= SEEN;
-	update_progress(cp);
-	/* Nothing to do, really .. The blob lookup was the important part */
-}
-
-static void process_gitlink(const unsigned char *sha1,
-			    struct object_array *p,
-			    struct name_path *path,
-			    const char *name)
-{
-	/* I don't think we want to recurse into this, really. */
-}
-
-static void process_tree(struct tree *tree,
-			 struct object_array *p,
-			 struct name_path *path,
-			 const char *name,
-			 struct connectivity_progress *cp)
-{
-	struct object *obj = &tree->object;
-	struct tree_desc desc;
-	struct name_entry entry;
-	struct name_path me;
-
-	if (!tree)
-		die("bad tree object");
-	if (obj->flags & SEEN)
-		return;
-	obj->flags |= SEEN;
-	update_progress(cp);
-	if (parse_tree(tree) < 0)
-		die("bad tree object %s", sha1_to_hex(obj->sha1));
-	add_object(obj, p, path, name);
-	me.up = path;
-	me.elem = name;
-	me.elem_len = strlen(name);
-
-	init_tree_desc(&desc, tree->buffer, tree->size);
-
-	while (tree_entry(&desc, &entry)) {
-		if (S_ISDIR(entry.mode))
-			process_tree(lookup_tree(entry.sha1), p, &me, entry.path, cp);
-		else if (S_ISGITLINK(entry.mode))
-			process_gitlink(entry.sha1, p, &me, entry.path);
-		else
-			process_blob(lookup_blob(entry.sha1), p, &me, entry.path, cp);
-	}
-	free_tree_buffer(tree);
-}
-
-static void process_tag(struct tag *tag, struct object_array *p,
-			const char *name, struct connectivity_progress *cp)
-{
-	struct object *obj = &tag->object;
-
-	if (obj->flags & SEEN)
-		return;
-	obj->flags |= SEEN;
-	update_progress(cp);
-
-	if (parse_tag(tag) < 0)
-		die("bad tag object %s", sha1_to_hex(obj->sha1));
-	if (tag->tagged)
-		add_object(tag->tagged, p, NULL, name);
-}
-
-static void walk_commit_list(struct rev_info *revs,
-			     struct connectivity_progress *cp)
-{
-	int i;
-	struct commit *commit;
-	struct object_array objects = OBJECT_ARRAY_INIT;
-
-	/* Walk all commits, process their trees */
-	while ((commit = get_revision(revs)) != NULL) {
-		process_tree(commit->tree, &objects, NULL, "", cp);
-		update_progress(cp);
-	}
-
-	/* Then walk all the pending objects, recursively processing them too */
-	for (i = 0; i < revs->pending.nr; i++) {
-		struct object_array_entry *pending = revs->pending.objects + i;
-		struct object *obj = pending->item;
-		const char *name = pending->name;
-		if (obj->type == OBJ_TAG) {
-			process_tag((struct tag *) obj, &objects, name, cp);
-			continue;
-		}
-		if (obj->type == OBJ_TREE) {
-			process_tree((struct tree *)obj, &objects, NULL, name, cp);
-			continue;
-		}
-		if (obj->type == OBJ_BLOB) {
-			process_blob((struct blob *)obj, &objects, NULL, name, cp);
-			continue;
-		}
-		die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
-	}
-}
-
 static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
 		const char *email, unsigned long timestamp, int tz,
 		const char *message, void *cb_data)
@@ -210,6 +99,21 @@ static void add_cache_refs(struct rev_info *revs)
 		add_cache_tree(active_cache_tree, revs);
 }
 
+/*
+ * The traversal will have already marked us as SEEN, so we
+ * only need to handle any progress reporting here.
+ */
+static void mark_object(struct object *obj, const struct name_path *path,
+			const char *name, void *data)
+{
+	update_progress(data);
+}
+
+static void mark_commit(struct commit *c, void *data)
+{
+	mark_object(&c->object, NULL, NULL, data);
+}
+
 void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
 			    struct progress *progress)
 {
@@ -245,6 +149,6 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
 	 */
 	if (prepare_revision_walk(revs))
 		die("revision walk setup failed");
-	walk_commit_list(revs, &cp);
+	traverse_commit_list(revs, mark_commit, mark_object, &cp);
 	display_progress(cp.progress, cp.count);
 }
-- 
2.1.2.596.g7379948

  parent reply	other threads:[~2014-10-15 22:37 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-10-15 22:32 [PATCH v2 0/25] prune-safety Jeff King
2014-10-15 22:33 ` [PATCH v2 01/25] foreach_alt_odb: propagate return value from callback Jeff King
2014-10-15 22:34 ` [PATCH v2 02/25] isxdigit: cast input to unsigned char Jeff King
2014-10-16 17:16   ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 03/25] object_array: factor out slopbuf-freeing logic Jeff King
2014-10-16 17:39   ` Junio C Hamano
2014-10-17  0:33     ` git-bundle rev handling and de-duping Jeff King
2014-10-17 21:03       ` Philip Oakley
2014-10-17 22:41         ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 04/25] object_array: add a "clear" function Jeff King
2014-10-15 22:35 ` [PATCH v2 05/25] clean up name allocation in prepare_revision_walk Jeff King
2014-10-15 22:37 ` Jeff King [this message]
2014-10-16 17:53   ` [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk Junio C Hamano
2014-10-15 22:38 ` [PATCH v2 07/25] reachable: reuse revision.c "add all reflogs" code Jeff King
2014-10-15 22:38 ` [PATCH v2 08/25] prune: factor out loose-object directory traversal Jeff King
2014-10-15 22:40 ` [PATCH v2 09/25] reachable: mark index blobs as SEEN Jeff King
2014-10-15 22:40 ` [PATCH v2 10/25] prune-packed: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:40 ` [PATCH v2 11/25] count-objects: do not use xsize_t when counting object size Jeff King
2014-10-15 22:41 ` [PATCH v2 12/25] count-objects: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:41 ` [PATCH v2 13/25] sha1_file: add for_each iterators for loose and packed objects Jeff King
2014-10-15 22:41 ` [PATCH v2 14/25] prune: keep objects reachable from recent objects Jeff King
2014-10-15 22:41 ` [PATCH v2 15/25] pack-objects: refactor unpack-unreachable expiration check Jeff King
2014-10-15 22:42 ` [PATCH v2 16/25] pack-objects: match prune logic for discarding objects Jeff King
2014-10-15 22:42 ` [PATCH v2 17/25] write_sha1_file: freshen existing objects Jeff King
2014-10-15 22:42 ` [PATCH v2 18/25] make add_object_array_with_context interface more sane Jeff King
2014-10-15 22:43 ` [PATCH v2 19/25] traverse_commit_list: support pending blobs/trees with paths Jeff King
2014-10-15 22:43 ` [PATCH v2 20/25] rev-list: document --reflog option Jeff King
2014-10-15 22:44 ` [PATCH v2 21/25] rev-list: add --index-objects option Jeff King
2014-10-16 18:41   ` Junio C Hamano
2014-10-17  0:12     ` Jeff King
2014-10-17  0:43       ` Jeff King
2014-10-17  0:44         ` [PATCH v3 22/26] rev-list: add --indexed-objects option Jeff King
2014-10-17  0:44         ` [PATCH v3 23/26] reachable: use revision machinery's --indexed-objects code Jeff King
2014-10-17  0:44         ` [PATCH v3 24/26] pack-objects: use argv_array Jeff King
2014-10-17  0:44         ` [PATCH v3 25/26] repack: pack objects mentioned by the index Jeff King
2014-10-17  0:44         ` [PATCH v3 26/26] pack-objects: double-check options before discarding objects Jeff King
2014-10-15 22:44 ` [PATCH v2 22/25] reachable: use revision machinery's --index-objects code Jeff King
2014-10-15 22:45 ` [PATCH v2 23/25] pack-objects: use argv_array Jeff King
2014-10-15 22:46 ` [PATCH v2 24/25] repack: pack objects mentioned by the index Jeff King
2014-10-15 22:48 ` [PATCH v2 25/25] pack-objects: double-check options before discarding objects Jeff King
2014-10-16 21:07 ` [PATCH v2 0/25] prune-safety Junio C Hamano
2014-10-16 21:10   ` Junio C Hamano
2014-10-16 21:21   ` Jeff King
2014-10-16 21:39     ` Jeff King
2014-10-16 22:18       ` Junio C Hamano
2014-10-17  0:03         ` Jeff King
     [not found]       ` <CAPc5daX0AFv9jDrFyd_OnupW5AfZW9Je_rgzaViX_xxs3SG5zg@mail.gmail.com>
2014-10-17  4:49         ` Jeff King
2014-10-18 12:31       ` Jeff King

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20141015223728.GF25630@peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mhagger@alum.mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).