* [PATCH 2/3] revision: introduce rev_walk_mode to clarify get_revision_1()
2026-05-27 15:49 [PATCH 0/3] revision: use priority queue for streaming walks Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` [PATCH 1/3] pack-objects: call release_revisions() after cruft traversal Kristofer Karlsson via GitGitGadget
@ 2026-05-27 15:50 ` Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` [PATCH 3/3] revision: use priority queue for non-limited streaming walks Kristofer Karlsson via GitGitGadget
2 siblings, 0 replies; 4+ messages in thread
From: Kristofer Karlsson via GitGitGadget @ 2026-05-27 15:50 UTC (permalink / raw)
To: git; +Cc: Kristofer Karlsson, Kristofer Karlsson
From: Kristofer Karlsson <krka@spotify.com>
get_revision_1() dispatches to different walk strategies based on a
combination of rev_info flags: reflog_info, topo_walk_info, and
limited. These conditions are checked in multiple places within
the function -- once to select the next commit, and again to decide
how to expand parents -- and the two chains must stay in sync.
Extract the mode selection into a rev_walk_mode enum and a small
get_walk_mode() helper, resolved once at the top of get_revision_1().
Both dispatch sites now switch on the same mode variable, making it
obvious that they agree and easier to verify that all modes are
handled.
No functional change.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
revision.c | 62 ++++++++++++++++++++++++++++++++++++++++++------------
1 file changed, 48 insertions(+), 14 deletions(-)
diff --git a/revision.c b/revision.c
index e1970b9c5d..9d0fc696d0 100644
--- a/revision.c
+++ b/revision.c
@@ -4327,22 +4327,48 @@ static void track_linear(struct rev_info *revs, struct commit *commit)
revs->previous_parents = commit_list_copy(commit->parents);
}
+enum rev_walk_mode {
+ REV_WALK_REFLOG,
+ REV_WALK_TOPO,
+ REV_WALK_LIMITED,
+ REV_WALK_STREAMING,
+};
+
+static enum rev_walk_mode get_walk_mode(struct rev_info *revs)
+{
+ if (revs->reflog_info)
+ return REV_WALK_REFLOG;
+ if (revs->topo_walk_info)
+ return REV_WALK_TOPO;
+ if (revs->limited)
+ return REV_WALK_LIMITED;
+ return REV_WALK_STREAMING;
+}
+
static struct commit *get_revision_1(struct rev_info *revs)
{
+ enum rev_walk_mode mode = get_walk_mode(revs);
+
while (1) {
struct commit *commit;
- if (revs->reflog_info)
+ switch (mode) {
+ case REV_WALK_REFLOG:
commit = next_reflog_entry(revs->reflog_info);
- else if (revs->topo_walk_info)
+ break;
+ case REV_WALK_TOPO:
commit = next_topo_commit(revs);
- else
+ break;
+ case REV_WALK_LIMITED:
+ case REV_WALK_STREAMING:
commit = pop_commit(&revs->commits);
+ break;
+ }
if (!commit)
return NULL;
- if (revs->reflog_info)
+ if (mode == REV_WALK_REFLOG)
commit->object.flags &= ~(ADDED | SEEN | SHOWN);
/*
@@ -4350,20 +4376,28 @@ static struct commit *get_revision_1(struct rev_info *revs)
* the parents here. We also need to do the date-based limiting
* that we'd otherwise have done in limit_list().
*/
- if (!revs->limited) {
- if (revs->max_age != -1 &&
- comparison_date(revs, commit) < revs->max_age)
- continue;
+ if (mode != REV_WALK_LIMITED &&
+ revs->max_age != -1 &&
+ comparison_date(revs, commit) < revs->max_age)
+ continue;
- if (revs->reflog_info)
- try_to_simplify_commit(revs, commit);
- else if (revs->topo_walk_info)
- expand_topo_walk(revs, commit);
- else if (process_parents(revs, commit, &revs->commits, NULL) < 0) {
+ switch (mode) {
+ case REV_WALK_REFLOG:
+ try_to_simplify_commit(revs, commit);
+ break;
+ case REV_WALK_TOPO:
+ expand_topo_walk(revs, commit);
+ break;
+ case REV_WALK_STREAMING:
+ if (process_parents(revs, commit,
+ &revs->commits, NULL) < 0) {
if (!revs->ignore_missing_links)
die("Failed to traverse parents of commit %s",
- oid_to_hex(&commit->object.oid));
+ oid_to_hex(&commit->object.oid));
}
+ break;
+ case REV_WALK_LIMITED:
+ break;
}
switch (simplify_commit(revs, commit)) {
--
gitgitgadget
^ permalink raw reply related [flat|nested] 4+ messages in thread* [PATCH 3/3] revision: use priority queue for non-limited streaming walks
2026-05-27 15:49 [PATCH 0/3] revision: use priority queue for streaming walks Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` [PATCH 1/3] pack-objects: call release_revisions() after cruft traversal Kristofer Karlsson via GitGitGadget
2026-05-27 15:50 ` [PATCH 2/3] revision: introduce rev_walk_mode to clarify get_revision_1() Kristofer Karlsson via GitGitGadget
@ 2026-05-27 15:50 ` Kristofer Karlsson via GitGitGadget
2 siblings, 0 replies; 4+ messages in thread
From: Kristofer Karlsson via GitGitGadget @ 2026-05-27 15:50 UTC (permalink / raw)
To: git; +Cc: Kristofer Karlsson, Kristofer Karlsson
From: Kristofer Karlsson <krka@spotify.com>
The streaming (non-limited) walk in get_revision_1() inserts newly
discovered parent commits into a date-sorted queue via
commit_list_insert_by_date(), which scans the linked list to find the
insertion point -- O(w) per insert, where w is the width of the active
walk frontier. Replace this with an O(log w) priority queue.
Add a commit_queue field to rev_info alongside the existing commits
linked list. The two representations are mutually exclusive: setup
and external callers that need list access use the linked list, then
get_revision_1() lazily drains it into the priority queue on first
call. Add a REV_WALK_NO_WALK enum value to distinguish the no_walk
case (which still uses the commit list) from the streaming case.
The conversion function rev_info_commit_list_to_queue() is public so
callers that know they will iterate can convert early.
Combined with the limit_list() priority queue change already in
master, this eliminates all O(w) sorted linked-list insertion from
the revision walk machinery.
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
commit.c | 13 -------------
commit.h | 2 --
revision.c | 55 +++++++++++++++++++++++++++++-------------------------
revision.h | 12 +++++++++++-
4 files changed, 41 insertions(+), 41 deletions(-)
diff --git a/commit.c b/commit.c
index e3e7352e69..5112c7b2af 100644
--- a/commit.c
+++ b/commit.c
@@ -729,19 +729,6 @@ void commit_list_free(struct commit_list *list)
pop_commit(&list);
}
-struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list)
-{
- struct commit_list **pp = list;
- struct commit_list *p;
- while ((p = *pp) != NULL) {
- if (p->item->date < item->date) {
- break;
- }
- pp = &p->next;
- }
- return commit_list_insert(item, pp);
-}
-
static int commit_list_compare_by_date(const struct commit_list *a,
const struct commit_list *b)
{
diff --git a/commit.h b/commit.h
index 58150045af..385492fbb1 100644
--- a/commit.h
+++ b/commit.h
@@ -191,8 +191,6 @@ int commit_list_contains(struct commit *item,
struct commit_list **commit_list_append(struct commit *commit,
struct commit_list **next);
unsigned commit_list_count(const struct commit_list *l);
-struct commit_list *commit_list_insert_by_date(struct commit *item,
- struct commit_list **list);
void commit_list_sort_by_date(struct commit_list **list);
/* Shallow copy of the input list */
diff --git a/revision.c b/revision.c
index 9d0fc696d0..4bb3b16e43 100644
--- a/revision.c
+++ b/revision.c
@@ -1116,7 +1116,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
}
static int process_parents(struct rev_info *revs, struct commit *commit,
- struct commit_list **list, struct prio_queue *queue)
+ struct prio_queue *queue)
{
struct commit_list *parent = commit->parents;
unsigned pass_flags;
@@ -1158,8 +1158,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
if (p->object.flags & SEEN)
continue;
p->object.flags |= (SEEN | NOT_USER_GIVEN);
- if (list)
- commit_list_insert_by_date(p, list);
if (queue)
prio_queue_put(queue, p);
if (revs->exclude_first_parent_only)
@@ -1207,8 +1205,6 @@ static int process_parents(struct rev_info *revs, struct commit *commit,
p->object.flags |= pass_flags | CHILD_VISITED;
if (!(p->object.flags & SEEN)) {
p->object.flags |= (SEEN | NOT_USER_GIVEN);
- if (list)
- commit_list_insert_by_date(p, list);
if (queue)
prio_queue_put(queue, p);
}
@@ -1470,7 +1466,7 @@ static int limit_list(struct rev_info *revs)
if (revs->max_age != -1 && (commit->date < revs->max_age))
obj->flags |= UNINTERESTING;
- if (process_parents(revs, commit, NULL, &queue) < 0) {
+ if (process_parents(revs, commit, &queue) < 0) {
clear_prio_queue(&queue);
return -1;
}
@@ -3257,6 +3253,7 @@ static void free_void_commit_list(void *list)
void release_revisions(struct rev_info *revs)
{
commit_list_free(revs->commits);
+ clear_prio_queue(&revs->commit_queue);
commit_list_free(revs->ancestry_path_bottoms);
release_display_notes(&revs->notes_opt);
object_array_clear(&revs->pending);
@@ -3726,7 +3723,7 @@ static void explore_walk_step(struct rev_info *revs)
if (revs->max_age != -1 && (c->date < revs->max_age))
c->object.flags |= UNINTERESTING;
- if (process_parents(revs, c, NULL, NULL) < 0)
+ if (process_parents(revs, c, NULL) < 0)
return;
if (c->object.flags & UNINTERESTING)
@@ -3902,7 +3899,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
{
struct commit_list *p;
struct topo_walk_info *info = revs->topo_walk_info;
- if (process_parents(revs, commit, NULL, NULL) < 0) {
+ if (process_parents(revs, commit, NULL) < 0) {
if (!revs->ignore_missing_links)
die("Failed to traverse parents of commit %s",
oid_to_hex(&commit->object.oid));
@@ -3938,6 +3935,13 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
}
}
+void rev_info_commit_list_to_queue(struct rev_info *revs)
+{
+ while (revs->commits)
+ prio_queue_put(&revs->commit_queue, pop_commit(&revs->commits));
+}
+
+
int prepare_revision_walk(struct rev_info *revs)
{
int i;
@@ -4006,7 +4010,7 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
for (;;) {
struct commit *p = *pp;
if (!revs->limited)
- if (process_parents(revs, p, NULL, queue) < 0)
+ if (process_parents(revs, p, queue) < 0)
return rewrite_one_error;
if (p->object.flags & UNINTERESTING)
return rewrite_one_ok;
@@ -4020,27 +4024,18 @@ static enum rewrite_result rewrite_one_1(struct rev_info *revs,
}
}
-static void merge_queue_into_list(struct prio_queue *q, struct commit_list **list)
+static void merge_queue_into_prio_queue(struct prio_queue *from,
+ struct prio_queue *to)
{
- while (q->nr) {
- struct commit *item = prio_queue_peek(q);
- struct commit_list *p = *list;
-
- if (p && p->item->date >= item->date)
- list = &p->next;
- else {
- p = commit_list_insert(item, list);
- list = &p->next; /* skip newly added item */
- prio_queue_get(q); /* pop item */
- }
- }
+ while (from->nr)
+ prio_queue_put(to, prio_queue_get(from));
}
static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp)
{
struct prio_queue queue = { compare_commits_by_commit_date };
enum rewrite_result ret = rewrite_one_1(revs, pp, &queue);
- merge_queue_into_list(&queue, &revs->commits);
+ merge_queue_into_prio_queue(&queue, &revs->commit_queue);
clear_prio_queue(&queue);
return ret;
}
@@ -4331,6 +4326,7 @@ enum rev_walk_mode {
REV_WALK_REFLOG,
REV_WALK_TOPO,
REV_WALK_LIMITED,
+ REV_WALK_NO_WALK,
REV_WALK_STREAMING,
};
@@ -4342,6 +4338,8 @@ static enum rev_walk_mode get_walk_mode(struct rev_info *revs)
return REV_WALK_TOPO;
if (revs->limited)
return REV_WALK_LIMITED;
+ if (revs->no_walk)
+ return REV_WALK_NO_WALK;
return REV_WALK_STREAMING;
}
@@ -4349,6 +4347,9 @@ static struct commit *get_revision_1(struct rev_info *revs)
{
enum rev_walk_mode mode = get_walk_mode(revs);
+ if (mode == REV_WALK_STREAMING && revs->commits)
+ rev_info_commit_list_to_queue(revs);
+
while (1) {
struct commit *commit;
@@ -4360,9 +4361,12 @@ static struct commit *get_revision_1(struct rev_info *revs)
commit = next_topo_commit(revs);
break;
case REV_WALK_LIMITED:
- case REV_WALK_STREAMING:
+ case REV_WALK_NO_WALK:
commit = pop_commit(&revs->commits);
break;
+ case REV_WALK_STREAMING:
+ commit = prio_queue_get(&revs->commit_queue);
+ break;
}
if (!commit)
@@ -4390,12 +4394,13 @@ static struct commit *get_revision_1(struct rev_info *revs)
break;
case REV_WALK_STREAMING:
if (process_parents(revs, commit,
- &revs->commits, NULL) < 0) {
+ &revs->commit_queue) < 0) {
if (!revs->ignore_missing_links)
die("Failed to traverse parents of commit %s",
oid_to_hex(&commit->object.oid));
}
break;
+ case REV_WALK_NO_WALK:
case REV_WALK_LIMITED:
break;
}
diff --git a/revision.h b/revision.h
index 584f1338b5..04982a3d47 100644
--- a/revision.h
+++ b/revision.h
@@ -12,6 +12,7 @@
#include "decorate.h"
#include "ident.h"
#include "list-objects-filter-options.h"
+#include "prio-queue.h"
#include "strvec.h"
/**
@@ -122,8 +123,14 @@ struct oidset;
struct topo_walk_info;
struct rev_info {
- /* Starting list */
+ /*
+ * Work queue of commits, stored as either a linked list or a
+ * priority queue, but never both at the same time.
+ * rev_info_commit_list_to_queue() converts list to queue.
+ */
struct commit_list *commits;
+ struct prio_queue commit_queue;
+
struct object_array pending;
struct repository *repo;
@@ -400,6 +407,7 @@ struct rev_info {
* uninitialized.
*/
#define REV_INFO_INIT { \
+ .commit_queue = { .compare = compare_commits_by_commit_date }, \
.abbrev = DEFAULT_ABBREV, \
.simplify_history = 1, \
.pruning.flags.recursive = 1, \
@@ -478,6 +486,8 @@ void reset_revision_walk(void);
*/
int prepare_revision_walk(struct rev_info *revs);
+/* Drain the commits linked list into the priority queue. */
+void rev_info_commit_list_to_queue(struct rev_info *revs);
/**
* Takes a pointer to a `rev_info` structure and iterates over it, returning a
* `struct commit *` each time you call it. The end of the revision list is
--
gitgitgadget
^ permalink raw reply related [flat|nested] 4+ messages in thread