From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Michael Haggerty <mhagger@alum.mit.edu>,
Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk
Date: Wed, 15 Oct 2014 18:37:28 -0400 [thread overview]
Message-ID: <20141015223728.GF25630@peff.net> (raw)
In-Reply-To: <20141015223244.GA25368@peff.net>
To find the set of reachable objects, we add a bunch of
possible sources to our rev_info, call prepare_revision_walk,
and then launch into a custom walker that handles each
object top. This is a subset of what traverse_commit_list
does, so we can just reuse that code (it can also handle
more complex cases like UNINTERESTING commits and pathspecs,
but we don't use those features).
Signed-off-by: Jeff King <peff@peff.net>
---
I was concerned this would be slower because traverse_commit_list is
more featureful. To my surprise, it was consistently about 3-4% faster!
The major difference is that traverse_commit_list will hit all of the
commits first, and then the trees. For reachability that doesn't matter
either way, but I suspect the new way has slightly better cache
locality, leading to the minor speedup.
reachable.c | 130 ++++++++----------------------------------------------------
1 file changed, 17 insertions(+), 113 deletions(-)
diff --git a/reachable.c b/reachable.c
index 6f6835b..02bf6c2 100644
--- a/reachable.c
+++ b/reachable.c
@@ -8,6 +8,7 @@
#include "reachable.h"
#include "cache-tree.h"
#include "progress.h"
+#include "list-objects.h"
struct connectivity_progress {
struct progress *progress;
@@ -21,118 +22,6 @@ static void update_progress(struct connectivity_progress *cp)
display_progress(cp->progress, cp->count);
}
-static void process_blob(struct blob *blob,
- struct object_array *p,
- struct name_path *path,
- const char *name,
- struct connectivity_progress *cp)
-{
- struct object *obj = &blob->object;
-
- if (!blob)
- die("bad blob object");
- if (obj->flags & SEEN)
- return;
- obj->flags |= SEEN;
- update_progress(cp);
- /* Nothing to do, really .. The blob lookup was the important part */
-}
-
-static void process_gitlink(const unsigned char *sha1,
- struct object_array *p,
- struct name_path *path,
- const char *name)
-{
- /* I don't think we want to recurse into this, really. */
-}
-
-static void process_tree(struct tree *tree,
- struct object_array *p,
- struct name_path *path,
- const char *name,
- struct connectivity_progress *cp)
-{
- struct object *obj = &tree->object;
- struct tree_desc desc;
- struct name_entry entry;
- struct name_path me;
-
- if (!tree)
- die("bad tree object");
- if (obj->flags & SEEN)
- return;
- obj->flags |= SEEN;
- update_progress(cp);
- if (parse_tree(tree) < 0)
- die("bad tree object %s", sha1_to_hex(obj->sha1));
- add_object(obj, p, path, name);
- me.up = path;
- me.elem = name;
- me.elem_len = strlen(name);
-
- init_tree_desc(&desc, tree->buffer, tree->size);
-
- while (tree_entry(&desc, &entry)) {
- if (S_ISDIR(entry.mode))
- process_tree(lookup_tree(entry.sha1), p, &me, entry.path, cp);
- else if (S_ISGITLINK(entry.mode))
- process_gitlink(entry.sha1, p, &me, entry.path);
- else
- process_blob(lookup_blob(entry.sha1), p, &me, entry.path, cp);
- }
- free_tree_buffer(tree);
-}
-
-static void process_tag(struct tag *tag, struct object_array *p,
- const char *name, struct connectivity_progress *cp)
-{
- struct object *obj = &tag->object;
-
- if (obj->flags & SEEN)
- return;
- obj->flags |= SEEN;
- update_progress(cp);
-
- if (parse_tag(tag) < 0)
- die("bad tag object %s", sha1_to_hex(obj->sha1));
- if (tag->tagged)
- add_object(tag->tagged, p, NULL, name);
-}
-
-static void walk_commit_list(struct rev_info *revs,
- struct connectivity_progress *cp)
-{
- int i;
- struct commit *commit;
- struct object_array objects = OBJECT_ARRAY_INIT;
-
- /* Walk all commits, process their trees */
- while ((commit = get_revision(revs)) != NULL) {
- process_tree(commit->tree, &objects, NULL, "", cp);
- update_progress(cp);
- }
-
- /* Then walk all the pending objects, recursively processing them too */
- for (i = 0; i < revs->pending.nr; i++) {
- struct object_array_entry *pending = revs->pending.objects + i;
- struct object *obj = pending->item;
- const char *name = pending->name;
- if (obj->type == OBJ_TAG) {
- process_tag((struct tag *) obj, &objects, name, cp);
- continue;
- }
- if (obj->type == OBJ_TREE) {
- process_tree((struct tree *)obj, &objects, NULL, name, cp);
- continue;
- }
- if (obj->type == OBJ_BLOB) {
- process_blob((struct blob *)obj, &objects, NULL, name, cp);
- continue;
- }
- die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
- }
-}
-
static int add_one_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
const char *email, unsigned long timestamp, int tz,
const char *message, void *cb_data)
@@ -210,6 +99,21 @@ static void add_cache_refs(struct rev_info *revs)
add_cache_tree(active_cache_tree, revs);
}
+/*
+ * The traversal will have already marked us as SEEN, so we
+ * only need to handle any progress reporting here.
+ */
+static void mark_object(struct object *obj, const struct name_path *path,
+ const char *name, void *data)
+{
+ update_progress(data);
+}
+
+static void mark_commit(struct commit *c, void *data)
+{
+ mark_object(&c->object, NULL, NULL, data);
+}
+
void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
struct progress *progress)
{
@@ -245,6 +149,6 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog,
*/
if (prepare_revision_walk(revs))
die("revision walk setup failed");
- walk_commit_list(revs, &cp);
+ traverse_commit_list(revs, mark_commit, mark_object, &cp);
display_progress(cp.progress, cp.count);
}
--
2.1.2.596.g7379948
next prev parent reply other threads:[~2014-10-15 22:37 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-10-15 22:32 [PATCH v2 0/25] prune-safety Jeff King
2014-10-15 22:33 ` [PATCH v2 01/25] foreach_alt_odb: propagate return value from callback Jeff King
2014-10-15 22:34 ` [PATCH v2 02/25] isxdigit: cast input to unsigned char Jeff King
2014-10-16 17:16 ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 03/25] object_array: factor out slopbuf-freeing logic Jeff King
2014-10-16 17:39 ` Junio C Hamano
2014-10-17 0:33 ` git-bundle rev handling and de-duping Jeff King
2014-10-17 21:03 ` Philip Oakley
2014-10-17 22:41 ` Junio C Hamano
2014-10-15 22:34 ` [PATCH v2 04/25] object_array: add a "clear" function Jeff King
2014-10-15 22:35 ` [PATCH v2 05/25] clean up name allocation in prepare_revision_walk Jeff King
2014-10-15 22:37 ` Jeff King [this message]
2014-10-16 17:53 ` [PATCH v2 06/25] reachable: use traverse_commit_list instead of custom walk Junio C Hamano
2014-10-15 22:38 ` [PATCH v2 07/25] reachable: reuse revision.c "add all reflogs" code Jeff King
2014-10-15 22:38 ` [PATCH v2 08/25] prune: factor out loose-object directory traversal Jeff King
2014-10-15 22:40 ` [PATCH v2 09/25] reachable: mark index blobs as SEEN Jeff King
2014-10-15 22:40 ` [PATCH v2 10/25] prune-packed: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:40 ` [PATCH v2 11/25] count-objects: do not use xsize_t when counting object size Jeff King
2014-10-15 22:41 ` [PATCH v2 12/25] count-objects: use for_each_loose_file_in_objdir Jeff King
2014-10-15 22:41 ` [PATCH v2 13/25] sha1_file: add for_each iterators for loose and packed objects Jeff King
2014-10-15 22:41 ` [PATCH v2 14/25] prune: keep objects reachable from recent objects Jeff King
2014-10-15 22:41 ` [PATCH v2 15/25] pack-objects: refactor unpack-unreachable expiration check Jeff King
2014-10-15 22:42 ` [PATCH v2 16/25] pack-objects: match prune logic for discarding objects Jeff King
2014-10-15 22:42 ` [PATCH v2 17/25] write_sha1_file: freshen existing objects Jeff King
2014-10-15 22:42 ` [PATCH v2 18/25] make add_object_array_with_context interface more sane Jeff King
2014-10-15 22:43 ` [PATCH v2 19/25] traverse_commit_list: support pending blobs/trees with paths Jeff King
2014-10-15 22:43 ` [PATCH v2 20/25] rev-list: document --reflog option Jeff King
2014-10-15 22:44 ` [PATCH v2 21/25] rev-list: add --index-objects option Jeff King
2014-10-16 18:41 ` Junio C Hamano
2014-10-17 0:12 ` Jeff King
2014-10-17 0:43 ` Jeff King
2014-10-17 0:44 ` [PATCH v3 22/26] rev-list: add --indexed-objects option Jeff King
2014-10-17 0:44 ` [PATCH v3 23/26] reachable: use revision machinery's --indexed-objects code Jeff King
2014-10-17 0:44 ` [PATCH v3 24/26] pack-objects: use argv_array Jeff King
2014-10-17 0:44 ` [PATCH v3 25/26] repack: pack objects mentioned by the index Jeff King
2014-10-17 0:44 ` [PATCH v3 26/26] pack-objects: double-check options before discarding objects Jeff King
2014-10-15 22:44 ` [PATCH v2 22/25] reachable: use revision machinery's --index-objects code Jeff King
2014-10-15 22:45 ` [PATCH v2 23/25] pack-objects: use argv_array Jeff King
2014-10-15 22:46 ` [PATCH v2 24/25] repack: pack objects mentioned by the index Jeff King
2014-10-15 22:48 ` [PATCH v2 25/25] pack-objects: double-check options before discarding objects Jeff King
2014-10-16 21:07 ` [PATCH v2 0/25] prune-safety Junio C Hamano
2014-10-16 21:10 ` Junio C Hamano
2014-10-16 21:21 ` Jeff King
2014-10-16 21:39 ` Jeff King
2014-10-16 22:18 ` Junio C Hamano
2014-10-17 0:03 ` Jeff King
[not found] ` <CAPc5daX0AFv9jDrFyd_OnupW5AfZW9Je_rgzaViX_xxs3SG5zg@mail.gmail.com>
2014-10-17 4:49 ` Jeff King
2014-10-18 12:31 ` Jeff King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141015223728.GF25630@peff.net \
--to=peff@peff.net \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=mhagger@alum.mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).