From: Patrick Steinhardt <ps@pks.im>
To: git@vger.kernel.org
Subject: [PATCH 04/12] reftable/merged: make subiters own their records
Date: Wed, 14 Feb 2024 08:45:54 +0100 [thread overview]
Message-ID: <2c51c60d165f5bb4e998c8b5dbb33be72e54d8a8.1707895758.git.ps@pks.im> (raw)
In-Reply-To: <cover.1707895758.git.ps@pks.im>
[-- Attachment #1: Type: text/plain, Size: 9250 bytes --]
For each subiterator, the merged table needs to track their current
record. This record is owned by the priority queue though instead of by
the merged iterator. This is not optimal performance-wise.
For one, we need to move around records whenever we add or remove a
record from the priority queue. Thus, the bigger the entries the more
bytes we need to copy around. And compared to pointers, a reftable
record is rather on the bigger side. The other issue is that this makes
it harder to reuse the records.
Refactor the code so that the merged iterator tracks ownership of the
records per-subiter. Instead of having records in the priority queue, we
can now use mere pointers to the per-subiter records. This also allows
us to swap records between the caller and the per-subiter record instead
of doing an actual copy via `reftable_record_copy_from()`, which removes
the need to release the caller-provided record.
This results in a noticeable speedup when iterating through many refs.
The following benchmark iterates through 1 million refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 145.5 ms ± 4.5 ms [User: 142.5 ms, System: 2.8 ms]
Range (min … max): 141.3 ms … 177.0 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 139.0 ms ± 4.7 ms [User: 136.1 ms, System: 2.8 ms]
Range (min … max): 134.2 ms … 182.2 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.05 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
This refactoring also allows a subsequent refactoring where we start
reusing memory allocated by the reftable records because we do not need
to release the caller-provided record anymore.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/merged.c | 54 ++++++++++++++++++++++++----------------------
reftable/pq.c | 8 ++-----
reftable/pq.h | 2 +-
reftable/pq_test.c | 41 ++++++++++++++++-------------------
4 files changed, 49 insertions(+), 56 deletions(-)
diff --git a/reftable/merged.c b/reftable/merged.c
index 9b1ccfff00..ae74234472 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -17,8 +17,13 @@ license that can be found in the LICENSE file or at
#include "reftable-error.h"
#include "system.h"
+struct merged_subiter {
+ struct reftable_iterator iter;
+ struct reftable_record rec;
+};
+
struct merged_iter {
- struct reftable_iterator *stack;
+ struct merged_subiter *subiters;
struct merged_iter_pqueue pq;
uint32_t hash_id;
size_t stack_len;
@@ -32,16 +37,18 @@ static int merged_iter_init(struct merged_iter *mi)
for (size_t i = 0; i < mi->stack_len; i++) {
struct pq_entry e = {
.index = i,
+ .rec = &mi->subiters[i].rec,
};
int err;
- reftable_record_init(&e.rec, mi->typ);
- err = iterator_next(&mi->stack[i], &e.rec);
+ reftable_record_init(&mi->subiters[i].rec, mi->typ);
+ err = iterator_next(&mi->subiters[i].iter,
+ &mi->subiters[i].rec);
if (err < 0)
return err;
if (err > 0) {
- reftable_iterator_destroy(&mi->stack[i]);
- reftable_record_release(&e.rec);
+ reftable_iterator_destroy(&mi->subiters[i].iter);
+ reftable_record_release(&mi->subiters[i].rec);
continue;
}
@@ -56,9 +63,11 @@ static void merged_iter_close(void *p)
struct merged_iter *mi = p;
merged_iter_pqueue_release(&mi->pq);
- for (size_t i = 0; i < mi->stack_len; i++)
- reftable_iterator_destroy(&mi->stack[i]);
- reftable_free(mi->stack);
+ for (size_t i = 0; i < mi->stack_len; i++) {
+ reftable_iterator_destroy(&mi->subiters[i].iter);
+ reftable_record_release(&mi->subiters[i].rec);
+ }
+ reftable_free(mi->subiters);
}
static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
@@ -66,17 +75,16 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
{
struct pq_entry e = {
.index = idx,
+ .rec = &mi->subiters[idx].rec,
};
int err;
- reftable_record_init(&e.rec, mi->typ);
- err = iterator_next(&mi->stack[idx], &e.rec);
+ err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
if (err < 0)
return err;
-
if (err > 0) {
- reftable_iterator_destroy(&mi->stack[idx]);
- reftable_record_release(&e.rec);
+ reftable_iterator_destroy(&mi->subiters[idx].iter);
+ reftable_record_release(&mi->subiters[idx].rec);
return 0;
}
@@ -86,7 +94,7 @@ static int merged_iter_advance_nonnull_subiter(struct merged_iter *mi,
static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
{
- if (iterator_is_null(&mi->stack[idx]))
+ if (iterator_is_null(&mi->subiters[idx].iter))
return 0;
return merged_iter_advance_nonnull_subiter(mi, idx);
}
@@ -121,25 +129,19 @@ static int merged_iter_next_entry(struct merged_iter *mi,
struct pq_entry top = merged_iter_pqueue_top(mi->pq);
int cmp;
- cmp = reftable_record_cmp(&top.rec, &entry.rec);
+ cmp = reftable_record_cmp(top.rec, entry.rec);
if (cmp > 0)
break;
merged_iter_pqueue_remove(&mi->pq);
err = merged_iter_advance_subiter(mi, top.index);
if (err < 0)
- goto done;
- reftable_record_release(&top.rec);
+ return err;
}
- reftable_record_release(rec);
- *rec = entry.rec;
mi->advance_index = entry.index;
-
-done:
- if (err)
- reftable_record_release(&entry.rec);
- return err;
+ SWAP(*rec, *entry.rec);
+ return 0;
}
static int merged_iter_next(struct merged_iter *mi, struct reftable_record *rec)
@@ -257,10 +259,10 @@ static int merged_table_seek_record(struct reftable_merged_table *mt,
struct merged_iter *p;
int err;
- REFTABLE_CALLOC_ARRAY(merged.stack, mt->stack_len);
+ REFTABLE_CALLOC_ARRAY(merged.subiters, mt->stack_len);
for (size_t i = 0; i < mt->stack_len; i++) {
err = reftable_table_seek_record(&mt->stack[i],
- &merged.stack[merged.stack_len], rec);
+ &merged.subiters[merged.stack_len].iter, rec);
if (err < 0)
goto out;
if (!err)
diff --git a/reftable/pq.c b/reftable/pq.c
index e0ccce2b97..0074d6bc43 100644
--- a/reftable/pq.c
+++ b/reftable/pq.c
@@ -14,7 +14,7 @@ license that can be found in the LICENSE file or at
int pq_less(struct pq_entry *a, struct pq_entry *b)
{
- int cmp = reftable_record_cmp(&a->rec, &b->rec);
+ int cmp = reftable_record_cmp(a->rec, b->rec);
if (cmp == 0)
return a->index > b->index;
return cmp < 0;
@@ -82,10 +82,6 @@ void merged_iter_pqueue_add(struct merged_iter_pqueue *pq, const struct pq_entry
void merged_iter_pqueue_release(struct merged_iter_pqueue *pq)
{
- int i = 0;
- for (i = 0; i < pq->len; i++) {
- reftable_record_release(&pq->heap[i].rec);
- }
FREE_AND_NULL(pq->heap);
- pq->len = pq->cap = 0;
+ memset(pq, 0, sizeof(*pq));
}
diff --git a/reftable/pq.h b/reftable/pq.h
index 9e25a43a36..ce23972c16 100644
--- a/reftable/pq.h
+++ b/reftable/pq.h
@@ -13,7 +13,7 @@ license that can be found in the LICENSE file or at
struct pq_entry {
size_t index;
- struct reftable_record rec;
+ struct reftable_record *rec;
};
struct merged_iter_pqueue {
diff --git a/reftable/pq_test.c b/reftable/pq_test.c
index c202eff848..b7d3c80cc7 100644
--- a/reftable/pq_test.c
+++ b/reftable/pq_test.c
@@ -27,48 +27,43 @@ void merged_iter_pqueue_check(struct merged_iter_pqueue pq)
static void test_pq(void)
{
- char *names[54] = { NULL };
- int N = ARRAY_SIZE(names) - 1;
-
struct merged_iter_pqueue pq = { NULL };
+ struct reftable_record recs[54];
+ int N = ARRAY_SIZE(recs) - 1, i;
char *last = NULL;
- int i = 0;
for (i = 0; i < N; i++) {
- char name[100];
- snprintf(name, sizeof(name), "%02d", i);
- names[i] = xstrdup(name);
+ struct strbuf refname = STRBUF_INIT;
+ strbuf_addf(&refname, "%02d", i);
+
+ reftable_record_init(&recs[i], BLOCK_TYPE_REF);
+ recs[i].u.ref.refname = strbuf_detach(&refname, NULL);
}
i = 1;
do {
- struct pq_entry e = { .rec = { .type = BLOCK_TYPE_REF,
- .u.ref = {
- .refname = names[i],
- } } };
+ struct pq_entry e = {
+ .rec = &recs[i],
+ };
+
merged_iter_pqueue_add(&pq, &e);
merged_iter_pqueue_check(pq);
+
i = (i * 7) % N;
} while (i != 1);
while (!merged_iter_pqueue_is_empty(pq)) {
struct pq_entry e = merged_iter_pqueue_remove(&pq);
- struct reftable_record *rec = &e.rec;
merged_iter_pqueue_check(pq);
- EXPECT(reftable_record_type(rec) == BLOCK_TYPE_REF);
- if (last) {
- EXPECT(strcmp(last, rec->u.ref.refname) < 0);
- }
- /* this is names[i], so don't dealloc. */
- last = rec->u.ref.refname;
- rec->u.ref.refname = NULL;
- reftable_record_release(rec);
- }
- for (i = 0; i < N; i++) {
- reftable_free(names[i]);
+ EXPECT(reftable_record_type(e.rec) == BLOCK_TYPE_REF);
+ if (last)
+ EXPECT(strcmp(last, e.rec->u.ref.refname) < 0);
+ last = e.rec->u.ref.refname;
}
+ for (i = 0; i < N; i++)
+ reftable_record_release(&recs[i]);
merged_iter_pqueue_release(&pq);
}
--
2.43.GIT
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
next prev parent reply other threads:[~2024-02-14 7:45 UTC|newest]
Thread overview: 51+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-14 7:45 [PATCH 00/12] reftable: improve ref iteration performance (pt.2) Patrick Steinhardt
2024-02-14 7:45 ` [PATCH 01/12] reftable/pq: use `size_t` to track iterator index Patrick Steinhardt
2024-02-14 7:45 ` [PATCH 02/12] reftable/merged: make `merged_iter` structure private Patrick Steinhardt
2024-02-20 18:15 ` Justin Tobler
2024-02-27 16:49 ` Patrick Steinhardt
2024-02-14 7:45 ` [PATCH 03/12] reftable/merged: advance subiter on subsequent iteration Patrick Steinhardt
2024-02-20 18:25 ` Justin Tobler
2024-02-27 16:50 ` Patrick Steinhardt
2024-02-14 7:45 ` Patrick Steinhardt [this message]
2024-02-14 7:45 ` [PATCH 05/12] reftable/merged: remove unnecessary null check for subiters Patrick Steinhardt
2024-02-14 7:46 ` [PATCH 06/12] reftable/merged: handle subiter cleanup on close only Patrick Steinhardt
2024-02-14 7:46 ` [PATCH 07/12] reftable/merged: circumvent pqueue with single subiter Patrick Steinhardt
2024-02-14 7:46 ` [PATCH 08/12] reftable/merged: avoid duplicate pqueue emptiness check Patrick Steinhardt
2024-02-27 23:53 ` James Liu
2024-02-14 7:46 ` [PATCH 09/12] reftable/record: reuse refname when decoding Patrick Steinhardt
2024-02-28 0:06 ` James Liu
2024-03-04 10:39 ` Patrick Steinhardt
2024-02-14 7:46 ` [PATCH 10/12] reftable/record: reuse refname when copying Patrick Steinhardt
2024-02-28 0:08 ` James Liu
2024-02-14 7:46 ` [PATCH 11/12] reftable/record: decode keys in place Patrick Steinhardt
2024-02-28 0:13 ` James Liu
2024-03-04 10:39 ` Patrick Steinhardt
2024-02-14 7:46 ` [PATCH 12/12] reftable: allow inlining of a few functions Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 00/13] reftable: improve ref iteration performance (pt.2) Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 01/13] reftable/pq: use `size_t` to track iterator index Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 02/13] reftable/merged: make `merged_iter` structure private Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 03/13] reftable/merged: advance subiter on subsequent iteration Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 04/13] reftable/merged: make subiters own their records Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 05/13] reftable/merged: remove unnecessary null check for subiters Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 06/13] reftable/merged: handle subiter cleanup on close only Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 07/13] reftable/merged: circumvent pqueue with single subiter Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 08/13] reftable/merged: avoid duplicate pqueue emptiness check Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 09/13] reftable/record: reuse refname when decoding Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 10/13] reftable/record: reuse refname when copying Patrick Steinhardt
2024-02-27 15:06 ` [PATCH v2 11/13] reftable/record: decode keys in place Patrick Steinhardt
2024-02-27 15:07 ` [PATCH v2 12/13] reftable: allow inlining of a few functions Patrick Steinhardt
2024-02-27 15:07 ` [PATCH v2 13/13] refs/reftable: precompute prefix length Patrick Steinhardt
2024-03-04 10:48 ` [PATCH v3 00/13] reftable: improve ref iteration performance (pt.2) Patrick Steinhardt
2024-03-04 10:48 ` [PATCH v3 01/13] reftable/pq: use `size_t` to track iterator index Patrick Steinhardt
2024-03-04 10:48 ` [PATCH v3 02/13] reftable/merged: make `merged_iter` structure private Patrick Steinhardt
2024-03-04 10:48 ` [PATCH v3 03/13] reftable/merged: advance subiter on subsequent iteration Patrick Steinhardt
2024-03-04 10:48 ` [PATCH v3 04/13] reftable/merged: make subiters own their records Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 05/13] reftable/merged: remove unnecessary null check for subiters Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 06/13] reftable/merged: handle subiter cleanup on close only Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 07/13] reftable/merged: circumvent pqueue with single subiter Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 08/13] reftable/merged: avoid duplicate pqueue emptiness check Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 09/13] reftable/record: reuse refname when decoding Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 10/13] reftable/record: reuse refname when copying Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 11/13] reftable/record: decode keys in place Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 12/13] reftable: allow inlining of a few functions Patrick Steinhardt
2024-03-04 10:49 ` [PATCH v3 13/13] refs/reftable: precompute prefix length Patrick Steinhardt
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2c51c60d165f5bb4e998c8b5dbb33be72e54d8a8.1707895758.git.ps@pks.im \
--to=ps@pks.im \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).