From: "Kristofer Karlsson via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Derrick Stolee <stolee@gmail.com>, Jeff King <peff@peff.net>,
Kristofer Karlsson <krka@spotify.com>,
Kristofer Karlsson <krka@spotify.com>,
Kristofer Karlsson <krka@spotify.com>
Subject: [PATCH v2 3/3] commit-reach: replace queue_has_nonstale() scan with O(1) tracking
Date: Mon, 25 May 2026 14:28:05 +0000 [thread overview]
Message-ID: <03771eb34c3ef1a896f5a63b4247b0a79f1589bb.1779719286.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.2124.v2.git.1779719286.gitgitgadget@gmail.com>
From: Kristofer Karlsson <krka@spotify.com>
paint_down_to_common() and ahead_behind() call queue_has_nonstale()
on every iteration to decide whether to continue the walk.
queue_has_nonstale() performs a linear scan of the priority queue,
making the overall walk O(n*m) where n is the number of commits
walked and m is the queue size.
Introduce 'struct nonstale_queue', a thin wrapper around prio_queue
that maintains a 'max_nonstale' pointer — the lowest-priority
(oldest) non-stale commit seen so far. When this commit is popped,
every remaining queue entry is known to be stale, so the walk can
stop. This reduces the per-iteration termination check from O(m)
to O(1).
Uses <= 0 (not < 0) when comparing priorities so that among distinct
commits with equal priority (same generation and timestamp) the
last-enqueued one is tracked. Since prio_queue breaks ties by
insertion order, this ensures max_nonstale is always the last in its
priority class to be popped, making pointer equality on pop
sufficient for correctness.
The previous commit's ENQUEUED deduplication guarantees each commit
appears at most once in the queue, which is required for the pointer
equality check to be unambiguous.
On a large monorepo (3.7M commits), this yields ~2x end-to-end
speedup for merge-base calculations on deep import branches.
Profiling shows paint_down_to_common() drops from 50% to 4% of
total runtime (~27x faster), with the remaining time in commit
graph lookups and heap operations:
Before: 8536ms / 5757ms / 4743ms (three test cases)
After: 3956ms / 4383ms / 1927ms
Suggested-by: Jeff King <peff@peff.net>
Signed-off-by: Kristofer Karlsson <krka@spotify.com>
---
commit-reach.c | 96 ++++++++++++++++++++++++++++++++++----------------
1 file changed, 65 insertions(+), 31 deletions(-)
diff --git a/commit-reach.c b/commit-reach.c
index 85583ae359..b5328a804c 100644
--- a/commit-reach.c
+++ b/commit-reach.c
@@ -40,32 +40,62 @@ static int compare_commits_by_gen(const void *_a, const void *_b)
return 0;
}
-static void prio_queue_put_dedup(struct prio_queue *queue, struct commit *c)
+/*
+ * A prio_queue with O(1) termination check. 'max_nonstale' tracks
+ * the lowest-priority non-stale commit enqueued so far; once it is
+ * popped, every remaining entry is known to be STALE.
+ */
+struct nonstale_queue {
+ struct prio_queue pq;
+ struct commit *max_nonstale;
+};
+
+static void nonstale_queue_put(struct nonstale_queue *queue,
+ struct commit *c)
+{
+ struct commit *old = queue->max_nonstale;
+
+ prio_queue_put(&queue->pq, c);
+ if (c->object.flags & STALE)
+ return;
+ if (!old || queue->pq.compare(old, c, queue->pq.cb_data) <= 0)
+ queue->max_nonstale = c;
+}
+
+static struct commit *nonstale_queue_get(struct nonstale_queue *queue)
+{
+ struct commit *commit = prio_queue_get(&queue->pq);
+
+ if (commit == queue->max_nonstale)
+ queue->max_nonstale = NULL;
+
+ return commit;
+}
+
+static void clear_nonstale_queue(struct nonstale_queue *queue)
+{
+ clear_prio_queue(&queue->pq);
+ queue->max_nonstale = NULL;
+}
+
+static void nonstale_queue_put_dedup(struct nonstale_queue *queue,
+ struct commit *c)
{
if (c->object.flags & ENQUEUED)
return;
c->object.flags |= ENQUEUED;
- prio_queue_put(queue, c);
+ nonstale_queue_put(queue, c);
}
-static struct commit *prio_queue_get_dedup(struct prio_queue *queue)
+static struct commit *nonstale_queue_get_dedup(struct nonstale_queue *queue)
{
- struct commit *commit = prio_queue_get(queue);
+ struct commit *commit = nonstale_queue_get(queue);
+
if (commit)
commit->object.flags &= ~ENQUEUED;
return commit;
}
-static int queue_has_nonstale(struct prio_queue *queue)
-{
- for (size_t i = 0; i < queue->nr; i++) {
- struct commit *commit = queue->array[i].data;
- if (!(commit->object.flags & STALE))
- return 1;
- }
- return 0;
-}
-
/* all input commits in one and twos[] must have been parsed! */
static int paint_down_to_common(struct repository *r,
struct commit *one, int n,
@@ -74,28 +104,30 @@ static int paint_down_to_common(struct repository *r,
enum merge_base_flags mb_flags,
struct commit_list **result)
{
- struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
+ struct nonstale_queue queue = {
+ { compare_commits_by_gen_then_commit_date }
+ };
int i;
timestamp_t last_gen = GENERATION_NUMBER_INFINITY;
struct commit_list **tail = result;
if (!min_generation && !corrected_commit_dates_enabled(r))
- queue.compare = compare_commits_by_commit_date;
+ queue.pq.compare = compare_commits_by_commit_date;
one->object.flags |= PARENT1;
if (!n) {
commit_list_append(one, result);
return 0;
}
- prio_queue_put_dedup(&queue, one);
+ nonstale_queue_put_dedup(&queue, one);
for (i = 0; i < n; i++) {
twos[i]->object.flags |= PARENT2;
- prio_queue_put_dedup(&queue, twos[i]);
+ nonstale_queue_put_dedup(&queue, twos[i]);
}
- while (queue_has_nonstale(&queue)) {
- struct commit *commit = prio_queue_get_dedup(&queue);
+ while (queue.max_nonstale) {
+ struct commit *commit = nonstale_queue_get_dedup(&queue);
struct commit_list *parents;
int flags;
timestamp_t generation = commit_graph_generation(commit);
@@ -133,7 +165,7 @@ static int paint_down_to_common(struct repository *r,
if ((p->object.flags & flags) == flags)
continue;
if (repo_parse_commit(r, p)) {
- clear_prio_queue(&queue);
+ clear_nonstale_queue(&queue);
commit_list_free(*result);
*result = NULL;
/*
@@ -149,11 +181,11 @@ static int paint_down_to_common(struct repository *r,
oid_to_hex(&p->object.oid));
}
p->object.flags |= flags;
- prio_queue_put_dedup(&queue, p);
+ nonstale_queue_put_dedup(&queue, p);
}
}
- clear_prio_queue(&queue);
+ clear_nonstale_queue(&queue);
commit_list_sort_by_date(result);
return 0;
}
@@ -1057,11 +1089,11 @@ struct commit_list *get_reachable_subset(struct commit **from, size_t nr_from,
define_commit_slab(bit_arrays, struct bitmap *);
static struct bit_arrays bit_arrays;
-static void insert_no_dup(struct prio_queue *queue, struct commit *c)
+static void insert_no_dup(struct nonstale_queue *queue, struct commit *c)
{
if (c->object.flags & PARENT2)
return;
- prio_queue_put(queue, c);
+ nonstale_queue_put(queue, c);
c->object.flags |= PARENT2;
}
@@ -1086,7 +1118,9 @@ void ahead_behind(struct repository *r,
struct commit **commits, size_t commits_nr,
struct ahead_behind_count *counts, size_t counts_nr)
{
- struct prio_queue queue = { .compare = compare_commits_by_gen_then_commit_date };
+ struct nonstale_queue queue = {
+ { .compare = compare_commits_by_gen_then_commit_date }
+ };
size_t width = DIV_ROUND_UP(commits_nr, BITS_IN_EWORD);
if (!commits_nr || !counts_nr)
@@ -1109,8 +1143,8 @@ void ahead_behind(struct repository *r,
insert_no_dup(&queue, c);
}
- while (queue_has_nonstale(&queue)) {
- struct commit *c = prio_queue_get(&queue);
+ while (queue.max_nonstale) {
+ struct commit *c = nonstale_queue_get(&queue);
struct commit_list *p;
struct bitmap *bitmap_c = get_bit_array(c, width);
@@ -1152,10 +1186,10 @@ void ahead_behind(struct repository *r,
/* STALE is used here, PARENT2 is used by insert_no_dup(). */
repo_clear_commit_marks(r, PARENT2 | STALE);
- for (size_t i = 0; i < queue.nr; i++)
- free_bit_array(queue.array[i].data);
+ for (size_t i = 0; i < queue.pq.nr; i++)
+ free_bit_array(queue.pq.array[i].data);
clear_bit_arrays(&bit_arrays);
- clear_prio_queue(&queue);
+ clear_nonstale_queue(&queue);
}
struct commit_and_index {
--
gitgitgadget
prev parent reply other threads:[~2026-05-25 14:28 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-24 17:42 [PATCH 0/3] commit-reach: replace queue_has_nonstale with a counter Kristofer Karlsson via GitGitGadget
2026-05-24 17:42 ` [PATCH 1/3] commit-reach: deduplicate queue entries in paint_down_to_common Kristofer Karlsson via GitGitGadget
2026-05-24 23:40 ` Junio C Hamano
2026-05-25 1:43 ` Derrick Stolee
2026-05-25 6:50 ` Kristofer Karlsson
2026-05-25 7:17 ` Junio C Hamano
2026-05-25 7:53 ` Kristofer Karlsson
2026-05-25 10:02 ` Jeff King
2026-05-25 7:01 ` Jeff King
2026-05-25 7:15 ` Jeff King
2026-05-24 17:42 ` [PATCH 2/3] commit-reach: optimize queue scan " Kristofer Karlsson via GitGitGadget
2026-05-25 1:59 ` Derrick Stolee
2026-05-25 8:54 ` Kristofer Karlsson
2026-05-24 17:42 ` [PATCH 3/3] commit-reach: optimize queue scan in ahead_behind Kristofer Karlsson via GitGitGadget
2026-05-25 7:11 ` Jeff King
2026-05-25 6:47 ` [PATCH 0/3] commit-reach: replace queue_has_nonstale with a counter Jeff King
2026-05-25 7:59 ` Kristofer Karlsson
2026-05-25 8:38 ` Junio C Hamano
2026-05-25 9:55 ` Jeff King
2026-05-25 10:47 ` Kristofer Karlsson
2026-05-25 14:28 ` [PATCH v2 0/3] commit-reach: replace queue_has_nonstale() scan with O(1) tracking Kristofer Karlsson via GitGitGadget
2026-05-25 14:28 ` [PATCH v2 1/3] object.h: fix stale entries in object flag allocation table Kristofer Karlsson via GitGitGadget
2026-05-25 14:28 ` [PATCH v2 2/3] commit-reach: deduplicate queue entries in paint_down_to_common Kristofer Karlsson via GitGitGadget
2026-05-25 14:28 ` Kristofer Karlsson via GitGitGadget [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=03771eb34c3ef1a896f5a63b4247b0a79f1589bb.1779719286.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=git@vger.kernel.org \
--cc=krka@spotify.com \
--cc=peff@peff.net \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox