* [PATCH 01/10] refs: allow passing flags when setting up a transaction
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-11 10:30 ` karthik nayak
2024-11-08 9:34 ` [PATCH 02/10] refs/files: move logic to commit initial transaction Patrick Steinhardt
` (11 subsequent siblings)
12 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
Allow passing flags when setting up a transaction such that the
behaviour of the transaction itself can be altered. Adapt callers
accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
branch.c | 2 +-
builtin/clone.c | 2 +-
builtin/fast-import.c | 4 ++--
builtin/fetch.c | 4 ++--
builtin/receive-pack.c | 4 ++--
builtin/replace.c | 2 +-
builtin/tag.c | 2 +-
builtin/update-ref.c | 4 ++--
refs.c | 12 +++++++-----
refs.h | 3 ++-
refs/files-backend.c | 11 +++++++----
refs/refs-internal.h | 1 +
sequencer.c | 6 +++---
walker.c | 2 +-
14 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/branch.c b/branch.c
index 44977ad0aadbd40c878a0475ef2df2a20798936b..ebaa870c018747358255d3f150d136f0df447d5d 100644
--- a/branch.c
+++ b/branch.c
@@ -627,7 +627,7 @@ void create_branch(struct repository *r,
else
msg = xstrfmt("branch: Created from %s", start_name);
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf,
&oid, forcing ? NULL : null_oid(),
diff --git a/builtin/clone.c b/builtin/clone.c
index 59fcb317a68a77eee3ca96a60720c556e044c369..d963cc6eb5181e1af5bb29c07c3ee2fa24ad04ca 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs)
struct strbuf err = STRBUF_INIT;
t = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!t)
die("%s", err.buf);
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f141f42da988dddae0e549144d1379031..db82c37a06f05d25e0a8c34cbec055e6f9717191 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1634,7 +1634,7 @@ static int update_branch(struct branch *b)
}
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, b->name, &b->oid, &old_oid,
NULL, NULL, 0, msg, &err) ||
@@ -1669,7 +1669,7 @@ static void dump_tags(void)
struct ref_transaction *transaction;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
failure |= error("%s", err.buf);
goto cleanup;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index d9027e4dc9245a32a87c47d89f9a29fcfd42534c..68d291c7dd0a4fe805e3b006cc68b185481fba6b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -669,7 +669,7 @@ static int s_update_ref(const char *action,
*/
if (!transaction) {
transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
ret = STORE_REF_ERROR_OTHER;
goto out;
@@ -1671,7 +1671,7 @@ static int do_fetch(struct transport *transport,
if (atomic_fetch) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
retcode = -1;
goto cleanup;
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index ab5b20e39c5038cdf6e6f05f4d66278a9c1ac156..9d2c07f68dafb6fce87cb59abc1cbf27db9aae09 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1849,7 +1849,7 @@ static void execute_commands_non_atomic(struct command *commands,
continue;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
@@ -1878,7 +1878,7 @@ static void execute_commands_atomic(struct command *commands,
const char *reported_error = "atomic push failure";
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
diff --git a/builtin/replace.c b/builtin/replace.c
index a44f4e7ea9ff55dbd1f102aca5e5f5a046391328..a4eaadff91f1be107a8e0e25a701d2006ff8cac2 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -201,7 +201,7 @@ static int replace_object_oid(const char *object_ref,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf, repl, &prev,
NULL, NULL, 0, NULL, &err) ||
diff --git a/builtin/tag.c b/builtin/tag.c
index 93d10d59157d2ee1b41f90640bd162917f1eb162..3fa0ab111344dda477763a74d26076b6fb71a5ab 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -681,7 +681,7 @@ int cmd_tag(int argc,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf, &object, &prev,
NULL, NULL,
diff --git a/builtin/update-ref.c b/builtin/update-ref.c
index 8a98615dc8613a1be3b17c6d688ab9c0208ed003..670e7812d60ea88a20cd1c5cd113643095cb3be1 100644
--- a/builtin/update-ref.c
+++ b/builtin/update-ref.c
@@ -612,7 +612,7 @@ static void update_refs_stdin(void)
int i, j;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction)
die("%s", err.buf);
@@ -680,7 +680,7 @@ static void update_refs_stdin(void)
*/
state = cmd->state;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction)
die("%s", err.buf);
diff --git a/refs.c b/refs.c
index 5f729ed4124f7fe8fa9df7fd1f1ce951abefc585..9effeb01eb45728514eab0ca92404ea8cf2158d9 100644
--- a/refs.c
+++ b/refs.c
@@ -918,7 +918,7 @@ int refs_delete_ref(struct ref_store *refs, const char *msg,
struct ref_transaction *transaction;
struct strbuf err = STRBUF_INIT;
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction ||
ref_transaction_delete(transaction, refname, old_oid,
NULL, flags, msg, &err) ||
@@ -1116,6 +1116,7 @@ int read_ref_at(struct ref_store *refs, const char *refname,
}
struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
+ unsigned int flags,
struct strbuf *err)
{
struct ref_transaction *tr;
@@ -1123,6 +1124,7 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
CALLOC_ARRAY(tr, 1);
tr->ref_store = refs;
+ tr->flags = flags;
return tr;
}
@@ -1309,7 +1311,7 @@ int refs_update_ref(struct ref_store *refs, const char *msg,
struct strbuf err = STRBUF_INIT;
int ret = 0;
- t = ref_store_transaction_begin(refs, &err);
+ t = ref_store_transaction_begin(refs, 0, &err);
if (!t ||
ref_transaction_update(t, refname, new_oid, old_oid, NULL, NULL,
flags, msg, &err) ||
@@ -2120,7 +2122,7 @@ int refs_update_symref(struct ref_store *refs, const char *ref,
struct strbuf err = STRBUF_INIT;
int ret = 0;
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref, NULL, NULL,
target, NULL, REF_NO_DEREF,
@@ -2527,7 +2529,7 @@ int refs_delete_refs(struct ref_store *refs, const char *logmsg,
* individual updates can't fail, so we can pack all of the
* updates into a single transaction.
*/
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction) {
ret = error("%s", err.buf);
goto out;
@@ -2833,7 +2835,7 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- transaction = ref_store_transaction_begin(new_refs, errbuf);
+ transaction = ref_store_transaction_begin(new_refs, 0, errbuf);
if (!transaction)
goto done;
diff --git a/refs.h b/refs.h
index 108dfc93b3428db491916ad8a55daea649d83ffd..9821f3e80d900b31c3dede489c2f415d968233d7 100644
--- a/refs.h
+++ b/refs.h
@@ -234,7 +234,7 @@ char *repo_default_branch_name(struct repository *r, int quiet);
* struct strbuf err = STRBUF_INIT;
* int ret = 0;
*
- * transaction = ref_store_transaction_begin(refs, &err);
+ * transaction = ref_store_transaction_begin(refs, 0, &err);
* if (!transaction ||
* ref_transaction_update(...) ||
* ref_transaction_create(...) ||
@@ -584,6 +584,7 @@ enum action_on_err {
* be freed by calling ref_transaction_free().
*/
struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
+ unsigned int flags,
struct strbuf *err);
/*
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 0824c0b8a946909791da36ddb4171db4ad98913b..df61057c9f24972b72644407cc95057891338d96 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1252,7 +1252,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r)
if (check_refname_format(r->name, 0))
return;
- transaction = ref_store_transaction_begin(&refs->base, &err);
+ transaction = ref_store_transaction_begin(&refs->base, 0, &err);
if (!transaction)
goto cleanup;
ref_transaction_add_update(
@@ -1396,7 +1396,8 @@ static int files_pack_refs(struct ref_store *ref_store,
if (!should_pack_refs(refs, opts))
return 0;
- transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
+ transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ 0, &err);
if (!transaction)
return -1;
@@ -2867,7 +2868,8 @@ static int files_transaction_prepare(struct ref_store *ref_store,
*/
if (!packed_transaction) {
packed_transaction = ref_store_transaction_begin(
- refs->packed_ref_store, err);
+ refs->packed_ref_store,
+ transaction->flags, err);
if (!packed_transaction) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
@@ -3174,7 +3176,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
&affected_refnames))
BUG("initial ref transaction called with existing refs");
- packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, err);
+ packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ transaction->flags, err);
if (!packed_transaction) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index 2313c830d8facaa17b0b4b073df0de958023062a..dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -193,6 +193,7 @@ struct ref_transaction {
size_t nr;
enum ref_transaction_state state;
void *backend_data;
+ unsigned int flags;
};
/*
diff --git a/sequencer.c b/sequencer.c
index 353d804999b88d5fd5dcf1254d80781f20e62f2e..287f4e5e8766327da6f31e87ce0c20f63b302b77 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -662,7 +662,7 @@ static int fast_forward_to(struct repository *r,
strbuf_addf(&sb, "%s: fast-forward", action_name(opts));
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, "HEAD",
to, unborn && !is_rebase_i(opts) ?
@@ -1297,7 +1297,7 @@ int update_head_with_reflog(const struct commit *old_head,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- err);
+ 0, err);
if (!transaction ||
ref_transaction_update(transaction, "HEAD", new_head,
old_head ? &old_head->object.oid : null_oid(),
@@ -3890,7 +3890,7 @@ static int do_label(struct repository *r, const char *name, int len)
strbuf_addf(&ref_name, "refs/rewritten/%.*s", len, name);
strbuf_addf(&msg, "rebase (label) '%.*s'", len, name);
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction) {
error("%s", err.buf);
ret = -1;
diff --git a/walker.c b/walker.c
index 5ea7e5b392b2bd49f249a9acc8d7ce8779357e1b..7cc9dbea46d64d6bd3336025d640f284a6202157 100644
--- a/walker.c
+++ b/walker.c
@@ -290,7 +290,7 @@ int walker_fetch(struct walker *walker, int targets, char **target,
if (write_ref) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
error("%s", err.buf);
goto done;
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH 01/10] refs: allow passing flags when setting up a transaction
2024-11-08 9:34 ` [PATCH 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
@ 2024-11-11 10:30 ` karthik nayak
2024-11-11 12:53 ` Patrick Steinhardt
0 siblings, 1 reply; 54+ messages in thread
From: karthik nayak @ 2024-11-11 10:30 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 344 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> Allow passing flags when setting up a transaction such that the
> behaviour of the transaction itself can be altered. Adapt callers
> accordingly.
>
Maybe it is self-explanatory with the upcoming patches, but it'd be nice
to know _why_ this change is being made.
The patch itself looks good.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 01/10] refs: allow passing flags when setting up a transaction
2024-11-11 10:30 ` karthik nayak
@ 2024-11-11 12:53 ` Patrick Steinhardt
0 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-11 12:53 UTC (permalink / raw)
To: karthik nayak; +Cc: git
On Mon, Nov 11, 2024 at 05:30:18AM -0500, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> > Allow passing flags when setting up a transaction such that the
> > behaviour of the transaction itself can be altered. Adapt callers
> > accordingly.
> >
>
> Maybe it is self-explanatory with the upcoming patches, but it'd be nice
> to know _why_ this change is being made.
>
> The patch itself looks good.
Explaining the exact use case here wouldn't make sense, as I'd basically
have to repeat the explanation given in the subsequent patch that wires
things up. But I can mention that this will be used in a later patch.
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 02/10] refs/files: move logic to commit initial transaction
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 03/10] refs: introduce "initial" transaction flag Patrick Steinhardt
` (10 subsequent siblings)
12 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
Move the logic to commit initial transactions such that we can start to
call it in `files_transaction_finish()` in a subsequent commit without
requiring a separate function declaration.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs/files-backend.c | 202 +++++++++++++++++++++++++--------------------------
1 file changed, 101 insertions(+), 101 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index df61057c9f24972b72644407cc95057891338d96..f37c805a34167b3749fbe724788180975abdae90 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2975,6 +2975,107 @@ static int parse_and_write_reflog(struct files_ref_store *refs,
return 0;
}
+static int ref_present(const char *refname, const char *referent UNUSED,
+ const struct object_id *oid UNUSED,
+ int flags UNUSED,
+ void *cb_data)
+{
+ struct string_list *affected_refnames = cb_data;
+
+ return string_list_has_string(affected_refnames, refname);
+}
+
+static int files_initial_transaction_commit(struct ref_store *ref_store,
+ struct ref_transaction *transaction,
+ struct strbuf *err)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_WRITE,
+ "initial_ref_transaction_commit");
+ size_t i;
+ int ret = 0;
+ struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
+ struct ref_transaction *packed_transaction = NULL;
+
+ assert(err);
+
+ if (transaction->state != REF_TRANSACTION_OPEN)
+ BUG("commit called for transaction that is not open");
+
+ /* Fail if a refname appears more than once in the transaction: */
+ for (i = 0; i < transaction->nr; i++)
+ string_list_append(&affected_refnames,
+ transaction->updates[i]->refname);
+ string_list_sort(&affected_refnames);
+ if (ref_update_reject_duplicates(&affected_refnames, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ /*
+ * It's really undefined to call this function in an active
+ * repository or when there are existing references: we are
+ * only locking and changing packed-refs, so (1) any
+ * simultaneous processes might try to change a reference at
+ * the same time we do, and (2) any existing loose versions of
+ * the references that we are setting would have precedence
+ * over our values. But some remote helpers create the remote
+ * "HEAD" and "master" branches before calling this function,
+ * so here we really only check that none of the references
+ * that we are creating already exists.
+ */
+ if (refs_for_each_rawref(&refs->base, ref_present,
+ &affected_refnames))
+ BUG("initial ref transaction called with existing refs");
+
+ packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ transaction->flags, err);
+ if (!packed_transaction) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ for (i = 0; i < transaction->nr; i++) {
+ struct ref_update *update = transaction->updates[i];
+
+ if ((update->flags & REF_HAVE_OLD) &&
+ !is_null_oid(&update->old_oid))
+ BUG("initial ref transaction with old_sha1 set");
+ if (refs_verify_refname_available(&refs->base, update->refname,
+ &affected_refnames, NULL,
+ err)) {
+ ret = TRANSACTION_NAME_CONFLICT;
+ goto cleanup;
+ }
+
+ /*
+ * Add a reference creation for this reference to the
+ * packed-refs transaction:
+ */
+ ref_transaction_add_update(packed_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ &update->new_oid, &update->old_oid,
+ NULL, NULL, NULL);
+ }
+
+ if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ if (initial_ref_transaction_commit(packed_transaction, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ }
+
+ packed_refs_unlock(refs->packed_ref_store);
+cleanup:
+ if (packed_transaction)
+ ref_transaction_free(packed_transaction);
+ transaction->state = REF_TRANSACTION_CLOSED;
+ string_list_clear(&affected_refnames, 0);
+ return ret;
+}
+
static int files_transaction_finish(struct ref_store *ref_store,
struct ref_transaction *transaction,
struct strbuf *err)
@@ -3123,107 +3224,6 @@ static int files_transaction_abort(struct ref_store *ref_store,
return 0;
}
-static int ref_present(const char *refname, const char *referent UNUSED,
- const struct object_id *oid UNUSED,
- int flags UNUSED,
- void *cb_data)
-{
- struct string_list *affected_refnames = cb_data;
-
- return string_list_has_string(affected_refnames, refname);
-}
-
-static int files_initial_transaction_commit(struct ref_store *ref_store,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct files_ref_store *refs =
- files_downcast(ref_store, REF_STORE_WRITE,
- "initial_ref_transaction_commit");
- size_t i;
- int ret = 0;
- struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
- struct ref_transaction *packed_transaction = NULL;
-
- assert(err);
-
- if (transaction->state != REF_TRANSACTION_OPEN)
- BUG("commit called for transaction that is not open");
-
- /* Fail if a refname appears more than once in the transaction: */
- for (i = 0; i < transaction->nr; i++)
- string_list_append(&affected_refnames,
- transaction->updates[i]->refname);
- string_list_sort(&affected_refnames);
- if (ref_update_reject_duplicates(&affected_refnames, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- /*
- * It's really undefined to call this function in an active
- * repository or when there are existing references: we are
- * only locking and changing packed-refs, so (1) any
- * simultaneous processes might try to change a reference at
- * the same time we do, and (2) any existing loose versions of
- * the references that we are setting would have precedence
- * over our values. But some remote helpers create the remote
- * "HEAD" and "master" branches before calling this function,
- * so here we really only check that none of the references
- * that we are creating already exists.
- */
- if (refs_for_each_rawref(&refs->base, ref_present,
- &affected_refnames))
- BUG("initial ref transaction called with existing refs");
-
- packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
- transaction->flags, err);
- if (!packed_transaction) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- for (i = 0; i < transaction->nr; i++) {
- struct ref_update *update = transaction->updates[i];
-
- if ((update->flags & REF_HAVE_OLD) &&
- !is_null_oid(&update->old_oid))
- BUG("initial ref transaction with old_sha1 set");
- if (refs_verify_refname_available(&refs->base, update->refname,
- &affected_refnames, NULL,
- err)) {
- ret = TRANSACTION_NAME_CONFLICT;
- goto cleanup;
- }
-
- /*
- * Add a reference creation for this reference to the
- * packed-refs transaction:
- */
- ref_transaction_add_update(packed_transaction, update->refname,
- update->flags & ~REF_HAVE_OLD,
- &update->new_oid, &update->old_oid,
- NULL, NULL, NULL);
- }
-
- if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- if (initial_ref_transaction_commit(packed_transaction, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- }
-
- packed_refs_unlock(refs->packed_ref_store);
-cleanup:
- if (packed_transaction)
- ref_transaction_free(packed_transaction);
- transaction->state = REF_TRANSACTION_CLOSED;
- string_list_clear(&affected_refnames, 0);
- return ret;
-}
-
struct expire_reflog_cb {
reflog_expiry_should_prune_fn *should_prune_fn;
void *policy_cb;
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 03/10] refs: introduce "initial" transaction flag
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 02/10] refs/files: move logic to commit initial transaction Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
` (9 subsequent siblings)
12 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
There are two different ways to commit a transaction:
- `ref_transaction_commit()` can be used to commit a regular
transaction and is what almost every caller wants.
- `initial_ref_transaction_commit()` can be used when it is known that
the ref store that the transaction is committed for is empty and
when there are no concurrent processes. This is used when cloning a
new repository.
Implementing this via two separate functions has a couple of downsides.
First, every reference backend needs to implement a separate callback
even in the case where they don't special-case the initial transaction.
Second, backends are basically forced to reimplement the whole logic for
how to commit the transaction like the "files" backend does, even though
backends may wish to only tweak certain behaviour of a "normal" commit.
Third, it is awkward that callers must never prepare the transaction as
this is somewhat different than how a transaction typically works.
Refactor the code such that we instead mark initial transactions via a
separate flag when starting the transaction. This addresses all of the
mentioned painpoints, where the most important part is that it will
allow backends to have way more leeway in how exactly they want to
handle the initial transaction.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/clone.c | 4 ++--
refs.c | 10 +---------
refs.h | 37 ++++++++++++++++++-------------------
refs/debug.c | 13 -------------
refs/files-backend.c | 16 ++++++++--------
refs/packed-backend.c | 8 --------
refs/refs-internal.h | 1 -
refs/reftable-backend.c | 8 --------
8 files changed, 29 insertions(+), 68 deletions(-)
diff --git a/builtin/clone.c b/builtin/clone.c
index d963cc6eb5181e1af5bb29c07c3ee2fa24ad04ca..8427ccec17e77b23ee1a7f526b80a52d79a71873 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs)
struct strbuf err = STRBUF_INIT;
t = ref_store_transaction_begin(get_main_ref_store(the_repository),
- 0, &err);
+ REF_TRANSACTION_FLAG_INITIAL, &err);
if (!t)
die("%s", err.buf);
@@ -586,7 +586,7 @@ static void write_remote_refs(const struct ref *local_refs)
die("%s", err.buf);
}
- if (initial_ref_transaction_commit(t, &err))
+ if (ref_transaction_commit(t, &err))
die("%s", err.buf);
strbuf_release(&err);
diff --git a/refs.c b/refs.c
index 9effeb01eb45728514eab0ca92404ea8cf2158d9..8b9dfc6173fd144fe9a172cb81cf33057ae31368 100644
--- a/refs.c
+++ b/refs.c
@@ -2315,7 +2315,7 @@ int ref_transaction_commit(struct ref_transaction *transaction,
}
ret = refs->be->transaction_finish(refs, transaction, err);
- if (!ret)
+ if (!ret && !(transaction->flags & REF_TRANSACTION_FLAG_INITIAL))
run_transaction_hook(transaction, "committed");
return ret;
}
@@ -2486,14 +2486,6 @@ int refs_reflog_expire(struct ref_store *refs,
cleanup_fn, policy_cb_data);
}
-int initial_ref_transaction_commit(struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct ref_store *refs = transaction->ref_store;
-
- return refs->be->initial_transaction_commit(refs, transaction, err);
-}
-
void ref_transaction_for_each_queued_update(struct ref_transaction *transaction,
ref_transaction_for_each_queued_update_fn cb,
void *cb_data)
diff --git a/refs.h b/refs.h
index 9821f3e80d900b31c3dede489c2f415d968233d7..024a370554e013d66febee635e4c0415ba061fe6 100644
--- a/refs.h
+++ b/refs.h
@@ -214,11 +214,9 @@ char *repo_default_branch_name(struct repository *r, int quiet);
*
* Or
*
- * - Call `initial_ref_transaction_commit()` if the ref database is
- * known to be empty and have no other writers (e.g. during
- * clone). This is likely to be much faster than
- * `ref_transaction_commit()`. `ref_transaction_prepare()` should
- * *not* be called before `initial_ref_transaction_commit()`.
+ * - Call `ref_transaction_begin()` with REF_TRANSACTION_FLAG_INITIAL if the
+ * ref database is known to be empty and have no other writers (e.g. during
+ * clone). This is likely to be much faster than without the flag.
*
* - Then finally, call `ref_transaction_free()` to free the
* `ref_transaction` data structure.
@@ -579,6 +577,21 @@ enum action_on_err {
UPDATE_REFS_QUIET_ON_ERR
};
+enum ref_transaction_flag {
+ /*
+ * The ref transaction is part of the initial creation of the ref store
+ * and can thus assume that the ref store is completely empty. This
+ * allows the backend to perform the transaction more efficiently by
+ * skipping certain checks.
+ *
+ * It is a bug to set this flag when there might be other processes
+ * accessing the repository or if there are existing references that
+ * might conflict with the ones being created. All old_oid values must
+ * either be absent or null_oid.
+ */
+ REF_TRANSACTION_FLAG_INITIAL = (1 << 0),
+};
+
/*
* Begin a reference transaction. The reference transaction must
* be freed by calling ref_transaction_free().
@@ -798,20 +811,6 @@ int ref_transaction_commit(struct ref_transaction *transaction,
int ref_transaction_abort(struct ref_transaction *transaction,
struct strbuf *err);
-/*
- * Like ref_transaction_commit(), but optimized for creating
- * references when originally initializing a repository (e.g., by "git
- * clone"). It writes the new references directly to packed-refs
- * without locking the individual references.
- *
- * It is a bug to call this function when there might be other
- * processes accessing the repository or if there are existing
- * references that might conflict with the ones being created. All
- * old_oid values must either be absent or null_oid.
- */
-int initial_ref_transaction_commit(struct ref_transaction *transaction,
- struct strbuf *err);
-
/*
* Execute the given callback function for each of the reference updates which
* have been queued in the given transaction. `old_oid` and `new_oid` may be
diff --git a/refs/debug.c b/refs/debug.c
index 45e2e784a0f8c49f492eaa9d371aa44f8c7916c3..cbac62c8a4f924580e80f106f87639daf77cef5c 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -118,18 +118,6 @@ static int debug_transaction_abort(struct ref_store *refs,
return res;
}
-static int debug_initial_transaction_commit(struct ref_store *refs,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct debug_ref_store *drefs = (struct debug_ref_store *)refs;
- int res;
- transaction->ref_store = drefs->refs;
- res = drefs->refs->be->initial_transaction_commit(drefs->refs,
- transaction, err);
- return res;
-}
-
static int debug_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts)
{
struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
@@ -443,7 +431,6 @@ struct ref_storage_be refs_be_debug = {
.transaction_prepare = debug_transaction_prepare,
.transaction_finish = debug_transaction_finish,
.transaction_abort = debug_transaction_abort,
- .initial_transaction_commit = debug_initial_transaction_commit,
.pack_refs = debug_pack_refs,
.rename_ref = debug_rename_ref,
diff --git a/refs/files-backend.c b/refs/files-backend.c
index f37c805a34167b3749fbe724788180975abdae90..3ed18475a72aa4798d15b2912c37b4caabd47aac 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2781,6 +2781,8 @@ static int files_transaction_prepare(struct ref_store *ref_store,
assert(err);
+ if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL)
+ goto cleanup;
if (!transaction->nr)
goto cleanup;
@@ -2985,13 +2987,10 @@ static int ref_present(const char *refname, const char *referent UNUSED,
return string_list_has_string(affected_refnames, refname);
}
-static int files_initial_transaction_commit(struct ref_store *ref_store,
+static int files_transaction_finish_initial(struct files_ref_store *refs,
struct ref_transaction *transaction,
struct strbuf *err)
{
- struct files_ref_store *refs =
- files_downcast(ref_store, REF_STORE_WRITE,
- "initial_ref_transaction_commit");
size_t i;
int ret = 0;
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
@@ -2999,8 +2998,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
assert(err);
- if (transaction->state != REF_TRANSACTION_OPEN)
- BUG("commit called for transaction that is not open");
+ if (transaction->state != REF_TRANSACTION_PREPARED)
+ BUG("commit called for transaction that is not prepared");
/* Fail if a refname appears more than once in the transaction: */
for (i = 0; i < transaction->nr; i++)
@@ -3063,7 +3062,7 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
goto cleanup;
}
- if (initial_ref_transaction_commit(packed_transaction, err)) {
+ if (ref_transaction_commit(packed_transaction, err)) {
ret = TRANSACTION_GENERIC_ERROR;
}
@@ -3091,6 +3090,8 @@ static int files_transaction_finish(struct ref_store *ref_store,
assert(err);
+ if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL)
+ return files_transaction_finish_initial(refs, transaction, err);
if (!transaction->nr) {
transaction->state = REF_TRANSACTION_CLOSED;
return 0;
@@ -3617,7 +3618,6 @@ struct ref_storage_be refs_be_files = {
.transaction_prepare = files_transaction_prepare,
.transaction_finish = files_transaction_finish,
.transaction_abort = files_transaction_abort,
- .initial_transaction_commit = files_initial_transaction_commit,
.pack_refs = files_pack_refs,
.rename_ref = files_rename_ref,
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 07c57fd541a5039d5fcb93d9bf78e1916f67b219..794471de60c78494f1f2b0e9de013422e3e7625d 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1730,13 +1730,6 @@ static int packed_transaction_finish(struct ref_store *ref_store,
return ret;
}
-static int packed_initial_transaction_commit(struct ref_store *ref_store UNUSED,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- return ref_transaction_commit(transaction, err);
-}
-
static int packed_pack_refs(struct ref_store *ref_store UNUSED,
struct pack_refs_opts *pack_opts UNUSED)
{
@@ -1769,7 +1762,6 @@ struct ref_storage_be refs_be_packed = {
.transaction_prepare = packed_transaction_prepare,
.transaction_finish = packed_transaction_finish,
.transaction_abort = packed_transaction_abort,
- .initial_transaction_commit = packed_initial_transaction_commit,
.pack_refs = packed_pack_refs,
.rename_ref = NULL,
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee..33335d54c9162755c70174093017439c0018f36d 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -666,7 +666,6 @@ struct ref_storage_be {
ref_transaction_prepare_fn *transaction_prepare;
ref_transaction_finish_fn *transaction_finish;
ref_transaction_abort_fn *transaction_abort;
- ref_transaction_commit_fn *initial_transaction_commit;
pack_refs_fn *pack_refs;
rename_ref_fn *rename_ref;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 38eb14d591ec0c7c221b6b0f7483e547748e7f1c..8e914afc817f198bed3199630b430e179cabc740 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1490,13 +1490,6 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED,
return ret;
}
-static int reftable_be_initial_transaction_commit(struct ref_store *ref_store UNUSED,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- return ref_transaction_commit(transaction, err);
-}
-
static int reftable_be_pack_refs(struct ref_store *ref_store,
struct pack_refs_opts *opts)
{
@@ -2490,7 +2483,6 @@ struct ref_storage_be refs_be_reftable = {
.transaction_prepare = reftable_be_transaction_prepare,
.transaction_finish = reftable_be_transaction_finish,
.transaction_abort = reftable_be_transaction_abort,
- .initial_transaction_commit = reftable_be_initial_transaction_commit,
.pack_refs = reftable_be_pack_refs,
.rename_ref = reftable_be_rename_ref,
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 04/10] refs/files: support symbolic and root refs in initial transaction
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (2 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 03/10] refs: introduce "initial" transaction flag Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-11 10:42 ` karthik nayak
2024-11-08 9:34 ` [PATCH 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
` (8 subsequent siblings)
12 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
The "files" backend has implemented special logic when committing
the first transactions in an otherwise empty ref store: instead of
writing all refs as separate loose files, it instead knows to write them
all into a "packed-refs" file directly. This is significantly more
efficient than having to write each of the refs as separate "loose" ref.
The only user of this optimization is git-clone(1), which only uses this
mechanism to write regular refs. Consequently, the implementation does
not know how to handle both symbolic and root refs. While fine in the
context of git-clone(1), this keeps us from using the mechanism in more
cases.
Adapt the logic to also support symbolic and root refs by using a second
transaction that we use for all of the refs that need to be written as
loose refs.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs/files-backend.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 3ed18475a72aa4798d15b2912c37b4caabd47aac..116d4259697b20583cb2db34ed47025e8781cd42 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2995,6 +2995,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
int ret = 0;
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
struct ref_transaction *packed_transaction = NULL;
+ struct ref_transaction *loose_transaction = NULL;
assert(err);
@@ -3040,6 +3041,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
if ((update->flags & REF_HAVE_OLD) &&
!is_null_oid(&update->old_oid))
BUG("initial ref transaction with old_sha1 set");
+
if (refs_verify_refname_available(&refs->base, update->refname,
&affected_refnames, NULL,
err)) {
@@ -3048,26 +3050,48 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
}
/*
- * Add a reference creation for this reference to the
- * packed-refs transaction:
+ * packed-refs don't support symbolic refs and root refs, so we
+ * have to queue these references via the loose transaction.
*/
- ref_transaction_add_update(packed_transaction, update->refname,
- update->flags & ~REF_HAVE_OLD,
- &update->new_oid, &update->old_oid,
- NULL, NULL, NULL);
+ if (update->new_target || is_root_ref(update->refname)) {
+ if (!loose_transaction) {
+ loose_transaction = ref_store_transaction_begin(&refs->base, 0, err);
+ if (!loose_transaction) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+ }
+
+ ref_transaction_add_update(loose_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ update->new_target ? NULL : &update->new_oid, NULL,
+ update->new_target, NULL, NULL);
+ } else {
+ ref_transaction_add_update(packed_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ &update->new_oid, &update->old_oid,
+ NULL, NULL, NULL);
+ }
}
- if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
+ if (packed_refs_lock(refs->packed_ref_store, 0, err) ||
+ ref_transaction_commit(packed_transaction, err)) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
}
+ packed_refs_unlock(refs->packed_ref_store);
- if (ref_transaction_commit(packed_transaction, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
+ if (loose_transaction) {
+ if (ref_transaction_prepare(loose_transaction, err) ||
+ ref_transaction_commit(loose_transaction, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
}
- packed_refs_unlock(refs->packed_ref_store);
cleanup:
+ if (loose_transaction)
+ ref_transaction_free(loose_transaction);
if (packed_transaction)
ref_transaction_free(packed_transaction);
transaction->state = REF_TRANSACTION_CLOSED;
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH 04/10] refs/files: support symbolic and root refs in initial transaction
2024-11-08 9:34 ` [PATCH 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
@ 2024-11-11 10:42 ` karthik nayak
2024-11-11 12:53 ` Patrick Steinhardt
0 siblings, 1 reply; 54+ messages in thread
From: karthik nayak @ 2024-11-11 10:42 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 1184 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> The "files" backend has implemented special logic when committing
> the first transactions in an otherwise empty ref store: instead of
> writing all refs as separate loose files, it instead knows to write them
> all into a "packed-refs" file directly. This is significantly more
> efficient than having to write each of the refs as separate "loose" ref.
>
> The only user of this optimization is git-clone(1), which only uses this
> mechanism to write regular refs. Consequently, the implementation does
> not know how to handle both symbolic and root refs. While fine in the
> context of git-clone(1), this keeps us from using the mechanism in more
> cases.
>
> Adapt the logic to also support symbolic and root refs by using a second
> transaction that we use for all of the refs that need to be written as
> loose refs.
>
The patch looks good. I was wondering if another way would be to just
add symref and root ref support to packed-refs. But that might be a
bigger undertaking than what we're doing here.
But thinking about it, seems like we can do that in a backwards
compatible way too.
The changes in this patch look good.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 04/10] refs/files: support symbolic and root refs in initial transaction
2024-11-11 10:42 ` karthik nayak
@ 2024-11-11 12:53 ` Patrick Steinhardt
0 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-11 12:53 UTC (permalink / raw)
To: karthik nayak; +Cc: git
On Mon, Nov 11, 2024 at 02:42:11AM -0800, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> > The "files" backend has implemented special logic when committing
> > the first transactions in an otherwise empty ref store: instead of
> > writing all refs as separate loose files, it instead knows to write them
> > all into a "packed-refs" file directly. This is significantly more
> > efficient than having to write each of the refs as separate "loose" ref.
> >
> > The only user of this optimization is git-clone(1), which only uses this
> > mechanism to write regular refs. Consequently, the implementation does
> > not know how to handle both symbolic and root refs. While fine in the
> > context of git-clone(1), this keeps us from using the mechanism in more
> > cases.
> >
> > Adapt the logic to also support symbolic and root refs by using a second
> > transaction that we use for all of the refs that need to be written as
> > loose refs.
> >
>
> The patch looks good. I was wondering if another way would be to just
> add symref and root ref support to packed-refs. But that might be a
> bigger undertaking than what we're doing here.
>
> But thinking about it, seems like we can do that in a backwards
> compatible way too.
I don't think you can without introducing a new version of the format,
mostly because you also have to think about clients that don't support
the new format. So all of this would be a big undertaking, and given
that we have the reftable backend nowadays I don't think it'd be all
that valuable.
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 05/10] refs: use "initial" transaction semantics to migrate refs
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (3 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-11 10:43 ` karthik nayak
2024-11-08 9:34 ` [PATCH 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
` (7 subsequent siblings)
12 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
Until now, we couldn't use "initial" transaction semantics to migrate
refs because the "files" backend only supported writing regular refs via
the initial transaction because it simply mapped the transaction to a
"packed-refs" transaction. But with the preceding commit, the "files"
backend has learned to also write symbolic and root refs in the initial
transaction by creating a second transaction for all refs that need to
be written as loose refs.
Adapt the code to migrate refs to commit the transaction as an initial
transaction. This results in a signiticant speedup when migrating many
refs:
Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 3.247 s ± 0.034 s [User: 0.485 s, System: 2.722 s]
Range (min … max): 3.216 s … 3.309 s 10 runs
Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
Time (mean ± σ): 453.6 ms ± 1.9 ms [User: 214.6 ms, System: 230.5 ms]
Range (min … max): 451.5 ms … 456.4 ms 10 runs
Summary
migrate reftable:files (refcount = 100000, revision = HEAD) ran
7.16 ± 0.08 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
As the reftable backend doesn't (yet) special-case initial transactions
there is no comparable speedup for that backend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 10 ++--------
t/t1460-refs-migrate.sh | 2 +-
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/refs.c b/refs.c
index 8b9dfc6173fd144fe9a172cb81cf33057ae31368..0f10c565bbb4e0d91210c52a3221a224e4264d28 100644
--- a/refs.c
+++ b/refs.c
@@ -2827,7 +2827,8 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- transaction = ref_store_transaction_begin(new_refs, 0, errbuf);
+ transaction = ref_store_transaction_begin(new_refs, REF_TRANSACTION_FLAG_INITIAL,
+ errbuf);
if (!transaction)
goto done;
@@ -2852,13 +2853,6 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- /*
- * TODO: we might want to migrate to `initial_ref_transaction_commit()`
- * here, which is more efficient for the files backend because it would
- * write new refs into the packed-refs file directly. At this point,
- * the files backend doesn't handle pseudo-refs and symrefs correctly
- * though, so this requires some more work.
- */
ret = ref_transaction_commit(transaction, errbuf);
if (ret < 0)
goto done;
diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh
index f7c0783d30ccd61b0fee67c115193b42bb0e2c77..b90b38a87f7bb905afeeceb4f9a3bfc8b772e16a 100755
--- a/t/t1460-refs-migrate.sh
+++ b/t/t1460-refs-migrate.sh
@@ -237,7 +237,7 @@ test_expect_success 'migrating from reftable format deletes backend files' '
test_path_is_missing repo/.git/reftable &&
echo "ref: refs/heads/main" >expect &&
test_cmp expect repo/.git/HEAD &&
- test_path_is_file repo/.git/refs/heads/main
+ test_path_is_file repo/.git/packed-refs
'
test_done
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH 05/10] refs: use "initial" transaction semantics to migrate refs
2024-11-08 9:34 ` [PATCH 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
@ 2024-11-11 10:43 ` karthik nayak
0 siblings, 0 replies; 54+ messages in thread
From: karthik nayak @ 2024-11-11 10:43 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 1578 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> Until now, we couldn't use "initial" transaction semantics to migrate
> refs because the "files" backend only supported writing regular refs via
> the initial transaction because it simply mapped the transaction to a
> "packed-refs" transaction. But with the preceding commit, the "files"
> backend has learned to also write symbolic and root refs in the initial
> transaction by creating a second transaction for all refs that need to
> be written as loose refs.
>
> Adapt the code to migrate refs to commit the transaction as an initial
> transaction. This results in a signiticant speedup when migrating many
> refs:
>
> Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
> Time (mean ± σ): 3.247 s ± 0.034 s [User: 0.485 s, System: 2.722 s]
> Range (min … max): 3.216 s … 3.309 s 10 runs
>
> Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
> Time (mean ± σ): 453.6 ms ± 1.9 ms [User: 214.6 ms, System: 230.5 ms]
> Range (min … max): 451.5 ms … 456.4 ms 10 runs
>
> Summary
> migrate reftable:files (refcount = 100000, revision = HEAD) ran
> 7.16 ± 0.08 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
>
> As the reftable backend doesn't (yet) special-case initial transactions
> there is no comparable speedup for that backend.
>
Nice. Really like how the actual perf changes were built up to a single
line change.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 06/10] refs: skip collision checks in initial transactions
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (4 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-11 10:53 ` karthik nayak
2024-11-08 9:34 ` [PATCH 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` Patrick Steinhardt
` (6 subsequent siblings)
12 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
Reference transactions use `refs_verify_refname_available()` to check
for colliding references. This check consists of two parts:
- Checks for whether multiple ref updates in the same transaction
conflict with each other.
- Checks for whether existing refs conflict with any refs part of the
transaction.
While we generally cannot avoid the first check, the second check is
superfluous in cases where the transaction is an initial one in an
otherwise empty ref store. The check results in multiple ref reads as
well as the creation of a ref iterator for every ref we're checking,
which adds up quite fast when performing the check for many refs.
Introduce a new flag that allows us to skip this check and wire it up in
such that the backends pass it when running an initial transaction. This
leads to significant speedups when migrating ref storage backends. From
"files" to "reftable":
Benchmark 1: migrate files:reftable (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 472.4 ms ± 6.7 ms [User: 175.9 ms, System: 285.2 ms]
Range (min … max): 463.5 ms … 483.2 ms 10 runs
Benchmark 2: migrate files:reftable (refcount = 100000, revision = HEAD)
Time (mean ± σ): 86.1 ms ± 1.9 ms [User: 67.9 ms, System: 16.0 ms]
Range (min … max): 82.9 ms … 90.9 ms 29 runs
Summary
migrate files:reftable (refcount = 100000, revision = HEAD) ran
5.48 ± 0.15 times faster than migrate files:reftable (refcount = 100000, revision = HEAD~)
And from "reftable" to "files":
Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 452.7 ms ± 3.4 ms [User: 209.9 ms, System: 235.4 ms]
Range (min … max): 445.9 ms … 457.5 ms 10 runs
Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
Time (mean ± σ): 95.2 ms ± 2.2 ms [User: 73.6 ms, System: 20.6 ms]
Range (min … max): 91.7 ms … 100.8 ms 28 runs
Summary
migrate reftable:files (refcount = 100000, revision = HEAD) ran
4.76 ± 0.11 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 37 +++++++++++++++++++++----------------
refs.h | 5 ++++-
refs/files-backend.c | 13 ++++++-------
refs/reftable-backend.c | 6 ++++--
t/helper/test-ref-store.c | 2 +-
5 files changed, 36 insertions(+), 27 deletions(-)
diff --git a/refs.c b/refs.c
index 0f10c565bbb4e0d91210c52a3221a224e4264d28..d690eb19b3fd7083a5309deb98738547e4f48040 100644
--- a/refs.c
+++ b/refs.c
@@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
+ int initial_transaction,
struct strbuf *err)
{
const char *slash;
@@ -2332,8 +2333,6 @@ int refs_verify_refname_available(struct ref_store *refs,
struct strbuf referent = STRBUF_INIT;
struct object_id oid;
unsigned int type;
- struct ref_iterator *iter;
- int ok;
int ret = -1;
/*
@@ -2363,7 +2362,8 @@ int refs_verify_refname_available(struct ref_store *refs,
if (skip && string_list_has_string(skip, dirname.buf))
continue;
- if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
+ if (!initial_transaction &&
+ !refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
&type, &ignore_errno)) {
strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
dirname.buf, refname);
@@ -2388,21 +2388,26 @@ int refs_verify_refname_available(struct ref_store *refs,
strbuf_addstr(&dirname, refname + dirname.len);
strbuf_addch(&dirname, '/');
- iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
- DO_FOR_EACH_INCLUDE_BROKEN);
- while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
- if (skip &&
- string_list_has_string(skip, iter->refname))
- continue;
+ if (!initial_transaction) {
+ struct ref_iterator *iter;
+ int ok;
- strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
- iter->refname, refname);
- ref_iterator_abort(iter);
- goto cleanup;
- }
+ iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
+ DO_FOR_EACH_INCLUDE_BROKEN);
+ while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
+ if (skip &&
+ string_list_has_string(skip, iter->refname))
+ continue;
- if (ok != ITER_DONE)
- BUG("error while iterating over references");
+ strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
+ iter->refname, refname);
+ ref_iterator_abort(iter);
+ goto cleanup;
+ }
+
+ if (ok != ITER_DONE)
+ BUG("error while iterating over references");
+ }
extra_refname = find_descendant_ref(dirname.buf, extras, skip);
if (extra_refname)
diff --git a/refs.h b/refs.h
index 024a370554e013d66febee635e4c0415ba061fe6..980bd20cf24e15144aeff991eeba8b27a936d386 100644
--- a/refs.h
+++ b/refs.h
@@ -101,13 +101,16 @@ int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname,
* both "foo" and with "foo/bar/baz" but not with "foo/bar" or
* "foo/barbados".
*
+ * If `initial_transaction` is truish, then all collision checks with
+ * preexisting refs are skipped.
+ *
* extras and skip must be sorted.
*/
-
int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
+ int initial_transaction,
struct strbuf *err);
int refs_ref_exists(struct ref_store *refs, const char *refname);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 116d4259697b20583cb2db34ed47025e8781cd42..d27806c02c272f8bddc789b509e3c3c7af4f75aa 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -706,7 +706,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
* reason to expect this error to be transitory.
*/
if (refs_verify_refname_available(&refs->base, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
if (mustexist) {
/*
* To the user the relevant error is
@@ -813,7 +813,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
REMOVE_DIR_EMPTY_ONLY)) {
if (refs_verify_refname_available(
&refs->base, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
/*
* The error message set by
* verify_refname_available() is OK.
@@ -850,7 +850,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
*/
if (refs_verify_refname_available(
refs->packed_ref_store, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
ret = TRANSACTION_NAME_CONFLICT;
goto error_return;
}
@@ -1159,7 +1159,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
*/
if (is_null_oid(&lock->old_oid) &&
refs_verify_refname_available(refs->packed_ref_store, refname,
- NULL, NULL, err))
+ NULL, NULL, 0, err))
goto error_return;
lock->ref_name = xstrdup(refname);
@@ -1538,7 +1538,7 @@ static int refs_rename_ref_available(struct ref_store *refs,
string_list_insert(&skip, old_refname);
ok = !refs_verify_refname_available(refs, new_refname,
- NULL, &skip, &err);
+ NULL, &skip, 0, &err);
if (!ok)
error("%s", err.buf);
@@ -3043,8 +3043,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
BUG("initial ref transaction with old_sha1 set");
if (refs_verify_refname_available(&refs->base, update->refname,
- &affected_refnames, NULL,
- err)) {
+ &affected_refnames, NULL, 1, err)) {
ret = TRANSACTION_NAME_CONFLICT;
goto cleanup;
}
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 8e914afc817f198bed3199630b430e179cabc740..bbc779ab410b41f00759a3a41a76dd708f115c90 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1097,7 +1097,9 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
* at a later point.
*/
ret = refs_verify_refname_available(ref_store, u->refname,
- &affected_refnames, NULL, err);
+ &affected_refnames, NULL,
+ transaction->flags & REF_TRANSACTION_FLAG_INITIAL,
+ err);
if (ret < 0)
goto done;
@@ -1584,7 +1586,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
if (arg->delete_old)
string_list_insert(&skip, arg->oldname);
ret = refs_verify_refname_available(&arg->refs->base, arg->newname,
- NULL, &skip, &errbuf);
+ NULL, &skip, 0, &errbuf);
if (ret < 0) {
error("%s", errbuf.buf);
goto done;
diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c
index 65346dee551ccd781a88786f0c8465f60b286cde..240f6fc29d7f1bb20658deee467bcb46ac3407b2 100644
--- a/t/helper/test-ref-store.c
+++ b/t/helper/test-ref-store.c
@@ -199,7 +199,7 @@ static int cmd_verify_ref(struct ref_store *refs, const char **argv)
struct strbuf err = STRBUF_INIT;
int ret;
- ret = refs_verify_refname_available(refs, refname, NULL, NULL, &err);
+ ret = refs_verify_refname_available(refs, refname, NULL, NULL, 0, &err);
if (err.len)
puts(err.buf);
return ret;
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH 06/10] refs: skip collision checks in initial transactions
2024-11-08 9:34 ` [PATCH 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
@ 2024-11-11 10:53 ` karthik nayak
0 siblings, 0 replies; 54+ messages in thread
From: karthik nayak @ 2024-11-11 10:53 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 3178 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
[snip]
> diff --git a/refs.c b/refs.c
> index 0f10c565bbb4e0d91210c52a3221a224e4264d28..d690eb19b3fd7083a5309deb98738547e4f48040 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs,
> const char *refname,
> const struct string_list *extras,
> const struct string_list *skip,
> + int initial_transaction,
> struct strbuf *err)
> {
> const char *slash;
> @@ -2332,8 +2333,6 @@ int refs_verify_refname_available(struct ref_store *refs,
> struct strbuf referent = STRBUF_INIT;
> struct object_id oid;
> unsigned int type;
> - struct ref_iterator *iter;
> - int ok;
> int ret = -1;
>
> /*
> @@ -2363,7 +2362,8 @@ int refs_verify_refname_available(struct ref_store *refs,
> if (skip && string_list_has_string(skip, dirname.buf))
> continue;
>
> - if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
> + if (!initial_transaction &&
> + !refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
Nit: It would be nice to have a comment here, perhaps something like:
/* in initial transaction there are no refs in the ref store */
> &type, &ignore_errno)) {
> strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
> dirname.buf, refname);
> @@ -2388,21 +2388,26 @@ int refs_verify_refname_available(struct ref_store *refs,
> strbuf_addstr(&dirname, refname + dirname.len);
> strbuf_addch(&dirname, '/');
>
> - iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
> - DO_FOR_EACH_INCLUDE_BROKEN);
> - while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
> - if (skip &&
> - string_list_has_string(skip, iter->refname))
> - continue;
> + if (!initial_transaction) {
> + struct ref_iterator *iter;
> + int ok;
>
> - strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
> - iter->refname, refname);
> - ref_iterator_abort(iter);
> - goto cleanup;
> - }
> + iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
> + DO_FOR_EACH_INCLUDE_BROKEN);
> + while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
> + if (skip &&
> + string_list_has_string(skip, iter->refname))
> + continue;
>
> - if (ok != ITER_DONE)
> - BUG("error while iterating over references");
> + strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
> + iter->refname, refname);
> + ref_iterator_abort(iter);
> + goto cleanup;
> + }
> +
> + if (ok != ITER_DONE)
> + BUG("error while iterating over references");
> + }
>
> extra_refname = find_descendant_ref(dirname.buf, extras, skip);
> if (extra_refname)
> diff --git a/refs.h b/refs.h
> index 024a370554e013d66febee635e4c0415ba061fe6..980bd20cf24e15144aeff991eeba8b27a936d386 100644
> --- a/refs.h
> +++ b/refs.h
> @@ -101,13 +101,16 @@ int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname,
> * both "foo" and with "foo/bar/baz" but not with "foo/bar" or
> * "foo/barbados".
> *
> + * If `initial_transaction` is truish, then all collision checks with
> + * preexisting refs are skipped.
> + *
Okay we have it here, so this should do.
[snip]
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (5 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
` (5 subsequent siblings)
12 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
When the `REF_SKIP_CREATE_REFLOG` flag is set we skip the creation of
the reflog entry, but we still normalize the reflog message when we
queue the update. This is a waste of resources as the normalized message
will never get used in the first place.
Fix this issue by skipping the normalization in case the flag is set.
This leads to a surprisingly large speedup when migrating from the
"files" to the "reftable" backend:
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 878.5 ms ± 14.9 ms [User: 726.5 ms, System: 139.2 ms]
Range (min … max): 858.4 ms … 941.3 ms 50 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 831.1 ms ± 10.5 ms [User: 694.1 ms, System: 126.3 ms]
Range (min … max): 812.4 ms … 851.4 ms 50 runs
Summary
migrate files:reftable (refcount = 1000000, revision = HEAD) ran
1.06 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)
And an ever larger speedup when migrating the other way round:
Benchmark 1: migrate reftable:files (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 923.6 ms ± 11.6 ms [User: 705.5 ms, System: 208.1 ms]
Range (min … max): 905.3 ms … 946.5 ms 50 runs
Benchmark 2: migrate reftable:files (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 818.5 ms ± 9.0 ms [User: 627.6 ms, System: 180.6 ms]
Range (min … max): 802.2 ms … 842.9 ms 50 runs
Summary
migrate reftable:files (refcount = 1000000, revision = HEAD) ran
1.13 ± 0.02 times faster than migrate reftable:files (refcount = 1000000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index d690eb19b3fd7083a5309deb98738547e4f48040..65eea3eb7734d03f09a22e8edfe5074d532398ff 100644
--- a/refs.c
+++ b/refs.c
@@ -1188,8 +1188,9 @@ struct ref_update *ref_transaction_add_update(
oidcpy(&update->new_oid, new_oid);
if ((flags & REF_HAVE_OLD) && old_oid)
oidcpy(&update->old_oid, old_oid);
+ if (!(flags & REF_SKIP_CREATE_REFLOG))
+ update->msg = normalize_reflog_message(msg);
- update->msg = normalize_reflog_message(msg);
return update;
}
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 08/10] reftable/writer: optimize allocations by using a scratch buffer
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (6 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 09/10] reftable/block: rename `block_writer::buf` variable Patrick Steinhardt
` (4 subsequent siblings)
12 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
Both `writer_add_record()` and `reftable_writer_add_ref()` get executed
for every single ref record we're adding to the reftable writer. And as
both functions use a local buffer to write data, the allocations we have
to do here add up during larger transactions.
Refactor the code to use a scratch buffer part of the `reftable_writer`
itself such that we can reuse it. This signifcantly reduces the number
of allocations during large transactions, e.g. when migrating refs from
the "files" backend to the "reftable" backend. Before this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 5,032,171 allocs, 5,032,122 frees, 418,792,092 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated
This also translate into a small speedup:
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 827.2 ms ± 16.5 ms [User: 689.4 ms, System: 124.9 ms]
Range (min … max): 809.0 ms … 924.7 ms 50 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 813.6 ms ± 11.6 ms [User: 679.0 ms, System: 123.4 ms]
Range (min … max): 786.7 ms … 833.5 ms 50 runs
Summary
migrate files:reftable (refcount = 1000000, revision = HEAD) ran
1.02 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/writer.c | 23 +++++++++++------------
reftable/writer.h | 1 +
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/reftable/writer.c b/reftable/writer.c
index fd136794d5a27b33b5017f36fbd6b095ab8dac5b..6501376ce826469556a7dfa3c39258847300ae66 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -148,6 +148,7 @@ int reftable_writer_new(struct reftable_writer **out,
reftable_buf_init(&wp->block_writer_data.last_key);
reftable_buf_init(&wp->last_key);
+ reftable_buf_init(&wp->buf);
REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
if (!wp->block) {
reftable_free(wp);
@@ -180,6 +181,7 @@ static void writer_release(struct reftable_writer *w)
w->block_writer = NULL;
writer_clear_index(w);
reftable_buf_release(&w->last_key);
+ reftable_buf_release(&w->buf);
}
}
@@ -249,20 +251,19 @@ static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *has
static int writer_add_record(struct reftable_writer *w,
struct reftable_record *rec)
{
- struct reftable_buf key = REFTABLE_BUF_INIT;
int err;
- err = reftable_record_key(rec, &key);
+ err = reftable_record_key(rec, &w->buf);
if (err < 0)
goto done;
- if (reftable_buf_cmp(&w->last_key, &key) >= 0) {
+ if (reftable_buf_cmp(&w->last_key, &w->buf) >= 0) {
err = REFTABLE_API_ERROR;
goto done;
}
reftable_buf_reset(&w->last_key);
- err = reftable_buf_add(&w->last_key, key.buf, key.len);
+ err = reftable_buf_add(&w->last_key, w->buf.buf, w->buf.len);
if (err < 0)
goto done;
@@ -312,7 +313,6 @@ static int writer_add_record(struct reftable_writer *w,
}
done:
- reftable_buf_release(&key);
return err;
}
@@ -325,7 +325,6 @@ int reftable_writer_add_ref(struct reftable_writer *w,
.ref = *ref
},
};
- struct reftable_buf buf = REFTABLE_BUF_INIT;
int err;
if (!ref->refname ||
@@ -340,24 +339,25 @@ int reftable_writer_add_ref(struct reftable_writer *w,
goto out;
if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
- err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref),
+ reftable_buf_reset(&w->buf);
+ err = reftable_buf_add(&w->buf, (char *)reftable_ref_record_val1(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
+ err = writer_index_hash(w, &w->buf);
if (err < 0)
goto out;
}
if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
- reftable_buf_reset(&buf);
- err = reftable_buf_add(&buf, reftable_ref_record_val2(ref),
+ reftable_buf_reset(&w->buf);
+ err = reftable_buf_add(&w->buf, reftable_ref_record_val2(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
+ err = writer_index_hash(w, &w->buf);
if (err < 0)
goto out;
}
@@ -365,7 +365,6 @@ int reftable_writer_add_ref(struct reftable_writer *w,
err = 0;
out:
- reftable_buf_release(&buf);
return err;
}
diff --git a/reftable/writer.h b/reftable/writer.h
index e8a6fbb78543e6e56920a2999601db0db9fe4d97..421a897dccd85ad0532860ff1b4f38b2813d438d 100644
--- a/reftable/writer.h
+++ b/reftable/writer.h
@@ -20,6 +20,7 @@ struct reftable_writer {
void *write_arg;
int pending_padding;
struct reftable_buf last_key;
+ struct reftable_buf buf;
/* offset of next block to write. */
uint64_t next;
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 09/10] reftable/block: rename `block_writer::buf` variable
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (7 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-08 9:34 ` [PATCH 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
` (3 subsequent siblings)
12 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
Adapt the name of the `block_writer::buf` variable to instead be called
`block`. This aligns it with the existing `block_len` variable, which
tracks the length of this buffer, and is generally a bit more tied to
the actual context where this variable gets used.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block.c | 20 ++++++++++----------
reftable/block.h | 8 ++++----
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/reftable/block.c b/reftable/block.c
index f5b432566a6b9f171a1f1374b6c892ab0696d744..3fa36c002a0c1852790780e74a6e055161f857d9 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -70,14 +70,14 @@ static int block_writer_register_restart(struct block_writer *w, int n,
return 0;
}
-int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
+int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
uint32_t block_size, uint32_t header_off, int hash_size)
{
- bw->buf = buf;
+ bw->block = block;
bw->hash_size = hash_size;
bw->block_size = block_size;
bw->header_off = header_off;
- bw->buf[header_off] = typ;
+ bw->block[header_off] = typ;
bw->next = header_off + 4;
bw->restart_interval = 16;
bw->entries = 0;
@@ -95,7 +95,7 @@ int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
uint8_t block_writer_type(struct block_writer *bw)
{
- return bw->buf[bw->header_off];
+ return bw->block[bw->header_off];
}
/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
@@ -107,7 +107,7 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
struct reftable_buf last =
w->entries % w->restart_interval == 0 ? empty : w->last_key;
struct string_view out = {
- .buf = w->buf + w->next,
+ .buf = w->block + w->next,
.len = w->block_size - w->next,
};
@@ -153,13 +153,13 @@ int block_writer_finish(struct block_writer *w)
{
int i;
for (i = 0; i < w->restart_len; i++) {
- put_be24(w->buf + w->next, w->restarts[i]);
+ put_be24(w->block + w->next, w->restarts[i]);
w->next += 3;
}
- put_be16(w->buf + w->next, w->restart_len);
+ put_be16(w->block + w->next, w->restart_len);
w->next += 2;
- put_be24(w->buf + 1 + w->header_off, w->next);
+ put_be24(w->block + 1 + w->header_off, w->next);
/*
* Log records are stored zlib-compressed. Note that the compression
@@ -188,7 +188,7 @@ int block_writer_finish(struct block_writer *w)
w->zstream->next_out = w->compressed;
w->zstream->avail_out = compressed_len;
- w->zstream->next_in = w->buf + block_header_skip;
+ w->zstream->next_in = w->block + block_header_skip;
w->zstream->avail_in = src_len;
/*
@@ -206,7 +206,7 @@ int block_writer_finish(struct block_writer *w)
* adjust the `next` pointer to point right after the
* compressed data.
*/
- memcpy(w->buf + block_header_skip, w->compressed,
+ memcpy(w->block + block_header_skip, w->compressed,
w->zstream->total_out);
w->next = w->zstream->total_out + block_header_skip;
}
diff --git a/reftable/block.h b/reftable/block.h
index 9a3effa513420039ee3f2834339c5082f64339d0..b3f837d612a8f0fbe98430b04e2dddaa975a15ab 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -22,7 +22,7 @@ struct block_writer {
unsigned char *compressed;
size_t compressed_cap;
- uint8_t *buf;
+ uint8_t *block;
uint32_t block_size;
/* Offset of the global header. Nonzero in the first block only. */
@@ -43,9 +43,9 @@ struct block_writer {
};
/*
- * initializes the blockwriter to write `typ` entries, using `buf` as temporary
- * storage. `buf` is not owned by the block_writer. */
-int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
+ * initializes the blockwriter to write `typ` entries, using `block` as temporary
+ * storage. `block` is not owned by the block_writer. */
+int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
uint32_t block_size, uint32_t header_off, int hash_size);
/* returns the block type (eg. 'r' for ref records. */
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH 10/10] reftable/block: optimize allocations by using scratch buffer
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (8 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 09/10] reftable/block: rename `block_writer::buf` variable Patrick Steinhardt
@ 2024-11-08 9:34 ` Patrick Steinhardt
2024-11-11 10:57 ` [PATCH 00/10] refs: optimize ref format migrations karthik nayak
` (2 subsequent siblings)
12 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-08 9:34 UTC (permalink / raw)
To: git; +Cc: Patrick Steinhardt
The block writer needs to compute the key for every record that one adds
to the writer. The buffer for this key is stored on the stack and thus
reallocated on every call to `block_writer_add()`, which is inefficient.
Refactor the code so that we store the buffer in the `block_writer`
struct itself so that we can reuse it. This reduces the number of
allocations when writing many refs, e.g. when migrating one million refs
from the "files" backend to the "reftable backend. Before this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 2,013,250 allocs, 2,013,201 frees, 347,543,583 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block.c | 13 +++++--------
reftable/block.h | 1 +
2 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/reftable/block.c b/reftable/block.c
index 3fa36c002a0c1852790780e74a6e055161f857d9..1aa7e8cd3cbf0980f6bc20262be89e755d0a4b4b 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -110,24 +110,21 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
.buf = w->block + w->next,
.len = w->block_size - w->next,
};
-
struct string_view start = out;
-
int is_restart = 0;
- struct reftable_buf key = REFTABLE_BUF_INIT;
int n = 0;
int err;
- err = reftable_record_key(rec, &key);
+ err = reftable_record_key(rec, &w->buf);
if (err < 0)
goto done;
- if (!key.len) {
+ if (!w->buf.len) {
err = REFTABLE_API_ERROR;
goto done;
}
- n = reftable_encode_key(&is_restart, out, last, key,
+ n = reftable_encode_key(&is_restart, out, last, w->buf,
reftable_record_val_type(rec));
if (n < 0) {
err = -1;
@@ -143,9 +140,8 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
string_view_consume(&out, n);
err = block_writer_register_restart(w, start.len - out.len, is_restart,
- &key);
+ &w->buf);
done:
- reftable_buf_release(&key);
return err;
}
@@ -569,6 +565,7 @@ void block_writer_release(struct block_writer *bw)
REFTABLE_FREE_AND_NULL(bw->zstream);
REFTABLE_FREE_AND_NULL(bw->restarts);
REFTABLE_FREE_AND_NULL(bw->compressed);
+ reftable_buf_release(&bw->buf);
reftable_buf_release(&bw->last_key);
/* the block is not owned. */
}
diff --git a/reftable/block.h b/reftable/block.h
index b3f837d612a8f0fbe98430b04e2dddaa975a15ab..d76f00553073c10185e716e71e2f415ce5dcf7e2 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -39,6 +39,7 @@ struct block_writer {
uint32_t restart_cap;
struct reftable_buf last_key;
+ struct reftable_buf buf;
int entries;
};
--
2.47.0.229.g8f8d6eee53.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH 00/10] refs: optimize ref format migrations
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (9 preceding siblings ...)
2024-11-08 9:34 ` [PATCH 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
@ 2024-11-11 10:57 ` karthik nayak
2024-11-11 12:53 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
12 siblings, 1 reply; 54+ messages in thread
From: karthik nayak @ 2024-11-11 10:57 UTC (permalink / raw)
To: Patrick Steinhardt, git
[-- Attachment #1: Type: text/plain, Size: 2975 bytes --]
Patrick Steinhardt <ps@pks.im> writes:
> Hi,
>
> I have recently learned that ref format migrations can take a
> significant amount of time in the order of minutes when migrating
> millions of refs. This is probably not entirely surprising: the initial
> focus for the logic to migrate ref backends was mostly focussed on
> getting the basic feature working, and I didn't yet invest any time into
> optimizing the code path at all. But I was still mildly surprised that
> the migration of a couple million refs was taking minutes to finish.
>
> This patch series thus optimizes how we migrate ref formats. This is
> mostly done by expanding upon the "initial transaction" semantics that
> we already use for git-clone(1). These semantics allow us to assume that
> the ref backend is completely empty and that there are no concurrent
> writers, and thus we are free to perform certain optimizations that
> wouldn't have otherwise been possible. On the one hand this allows us to
> drop needless collision checks. On the other hand, it also allows us to
> write regular refs directly into the "packed-refs" file when migrating
> from the "reftable" backend to the "files" backend.
>
> This leads to some significant speedups. Migrating 1 million refs from
> "files" to "reftable":
>
> Benchmark 1: migrate files:reftable (refcount = 1000000, revision = origin/master)
> Time (mean ± σ): 4.580 s ± 0.062 s [User: 1.818 s, System: 2.746 s]
> Range (min … max): 4.534 s … 4.743 s 10 runs
>
> Benchmark 2: migrate files:reftable (refcount = 1000000, revision = pks-refs-optimize-migrations)
> Time (mean ± σ): 767.7 ms ± 9.5 ms [User: 629.2 ms, System: 126.1 ms]
> Range (min … max): 755.8 ms … 786.9 ms 10 runs
>
> Summary
> migrate files:reftable (refcount = 1000000, revision = pks-refs-optimize-migrations) ran
> 5.97 ± 0.11 times faster than migrate files:reftable (refcount = 1000000, revision = origin/master)
>
> And migrating from "reftable" to "files:
>
> Benchmark 1: migrate reftable:files (refcount = 1000000, revision = origin/master)
> Time (mean ± σ): 35.409 s ± 0.302 s [User: 5.061 s, System: 29.244 s]
> Range (min … max): 35.055 s … 35.898 s 10 runs
>
> Benchmark 2: migrate reftable:files (refcount = 1000000, revision = pks-refs-optimize-migrations)
> Time (mean ± σ): 855.9 ms ± 61.5 ms [User: 646.7 ms, System: 187.1 ms]
> Range (min … max): 830.0 ms … 1030.3 ms 10 runs
>
> Summary
> migrate reftable:files (refcount = 1000000, revision = pks-refs-optimize-migrations) ran
> 41.37 ± 2.99 times faster than migrate reftable:files (refcount = 1000000, revision = origin/master)
>
> Thanks!
>
> Patrick
>
I read through the series, apart from a few small nits, the patches
look good and straightforward.
Thanks
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 00/10] refs: optimize ref format migrations
2024-11-11 10:57 ` [PATCH 00/10] refs: optimize ref format migrations karthik nayak
@ 2024-11-11 12:53 ` Patrick Steinhardt
2024-11-20 7:04 ` Junio C Hamano
0 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-11 12:53 UTC (permalink / raw)
To: karthik nayak; +Cc: git
On Mon, Nov 11, 2024 at 05:57:43AM -0500, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> I read through the series, apart from a few small nits, the patches
> look good and straightforward.
I've queued the single change to the first commit message locally, but
don't think that this is sufficient reason yet to reroll the patch
series, so I'll wait for additional feedback.
Thanks for your review!
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 00/10] refs: optimize ref format migrations
2024-11-11 12:53 ` Patrick Steinhardt
@ 2024-11-20 7:04 ` Junio C Hamano
2024-11-20 7:50 ` Patrick Steinhardt
0 siblings, 1 reply; 54+ messages in thread
From: Junio C Hamano @ 2024-11-20 7:04 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: karthik nayak, git
Patrick Steinhardt <ps@pks.im> writes:
> On Mon, Nov 11, 2024 at 05:57:43AM -0500, karthik nayak wrote:
>> Patrick Steinhardt <ps@pks.im> writes:
>> I read through the series, apart from a few small nits, the patches
>> look good and straightforward.
>
> I've queued the single change to the first commit message locally, but
> don't think that this is sufficient reason yet to reroll the patch
> series, so I'll wait for additional feedback.
It's been a week and a half, and since Karthik's reviews nothing has
happened. So, let's start looking at merging it with that minimum
update, perhaps?
Thanks.
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 00/10] refs: optimize ref format migrations
2024-11-20 7:04 ` Junio C Hamano
@ 2024-11-20 7:50 ` Patrick Steinhardt
2024-11-20 10:25 ` Christian Couder
0 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:50 UTC (permalink / raw)
To: Junio C Hamano; +Cc: karthik nayak, git
On Wed, Nov 20, 2024 at 04:04:55PM +0900, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
>
> > On Mon, Nov 11, 2024 at 05:57:43AM -0500, karthik nayak wrote:
> >> Patrick Steinhardt <ps@pks.im> writes:
> >> I read through the series, apart from a few small nits, the patches
> >> look good and straightforward.
> >
> > I've queued the single change to the first commit message locally, but
> > don't think that this is sufficient reason yet to reroll the patch
> > series, so I'll wait for additional feedback.
>
> It's been a week and a half, and since Karthik's reviews nothing has
> happened. So, let's start looking at merging it with that minimum
> update, perhaps?
Sure, let's do it that way. Will send the updated patch series in a
second, thanks!
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 00/10] refs: optimize ref format migrations
2024-11-20 7:50 ` Patrick Steinhardt
@ 2024-11-20 10:25 ` Christian Couder
2024-11-25 5:52 ` Patrick Steinhardt
0 siblings, 1 reply; 54+ messages in thread
From: Christian Couder @ 2024-11-20 10:25 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: Junio C Hamano, karthik nayak, git
On Wed, Nov 20, 2024 at 9:00 AM Patrick Steinhardt <ps@pks.im> wrote:
> Sure, let's do it that way. Will send the updated patch series in a
> second, thanks!
I took a look at the v2 and found a number of small nits. Not sure
it's worth a reroll though, as otherwise I found it great!
Thanks.
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH 00/10] refs: optimize ref format migrations
2024-11-20 10:25 ` Christian Couder
@ 2024-11-25 5:52 ` Patrick Steinhardt
0 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 5:52 UTC (permalink / raw)
To: Christian Couder; +Cc: Patrick Steinhardt, Junio C Hamano, karthik nayak, git
On Wed, Nov 20, 2024 at 11:25:26AM +0100, Christian Couder wrote:
> On Wed, Nov 20, 2024 at 9:00 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> > Sure, let's do it that way. Will send the updated patch series in a
> > second, thanks!
>
> I took a look at the v2 and found a number of small nits. Not sure
> it's worth a reroll though, as otherwise I found it great!
Thanks for your review, I'll send a v3 soonish!
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v2 00/10] refs: optimize ref format migrations
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (10 preceding siblings ...)
2024-11-11 10:57 ` [PATCH 00/10] refs: optimize ref format migrations karthik nayak
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
` (9 more replies)
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
12 siblings, 10 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Hi,
I have recently learned that ref format migrations can take a
significant amount of time in the order of minutes when migrating
millions of refs. This is probably not entirely surprising: the initial
focus for the logic to migrate ref backends was mostly focussed on
getting the basic feature working, and I didn't yet invest any time into
optimizing the code path at all. But I was still mildly surprised that
the migration of a couple million refs was taking minutes to finish.
This patch series thus optimizes how we migrate ref formats. This is
mostly done by expanding upon the "initial transaction" semantics that
we already use for git-clone(1). These semantics allow us to assume that
the ref backend is completely empty and that there are no concurrent
writers, and thus we are free to perform certain optimizations that
wouldn't have otherwise been possible. On the one hand this allows us to
drop needless collision checks. On the other hand, it also allows us to
write regular refs directly into the "packed-refs" file when migrating
from the "reftable" backend to the "files" backend.
This leads to some significant speedups. Migrating 1 million refs from
"files" to "reftable":
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = origin/master)
Time (mean ± σ): 4.580 s ± 0.062 s [User: 1.818 s, System: 2.746 s]
Range (min … max): 4.534 s … 4.743 s 10 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = pks-refs-optimize-migrations)
Time (mean ± σ): 767.7 ms ± 9.5 ms [User: 629.2 ms, System: 126.1 ms]
Range (min … max): 755.8 ms … 786.9 ms 10 runs
Summary
migrate files:reftable (refcount = 1000000, revision = pks-refs-optimize-migrations) ran
5.97 ± 0.11 times faster than migrate files:reftable (refcount = 1000000, revision = origin/master)
And migrating from "reftable" to "files:
Benchmark 1: migrate reftable:files (refcount = 1000000, revision = origin/master)
Time (mean ± σ): 35.409 s ± 0.302 s [User: 5.061 s, System: 29.244 s]
Range (min … max): 35.055 s … 35.898 s 10 runs
Benchmark 2: migrate reftable:files (refcount = 1000000, revision = pks-refs-optimize-migrations)
Time (mean ± σ): 855.9 ms ± 61.5 ms [User: 646.7 ms, System: 187.1 ms]
Range (min … max): 830.0 ms … 1030.3 ms 10 runs
Summary
migrate reftable:files (refcount = 1000000, revision = pks-refs-optimize-migrations) ran
41.37 ± 2.99 times faster than migrate reftable:files (refcount = 1000000, revision = origin/master)
Changes in v2:
- Mention in the first commit message that the introduced flag will be
used in a subsequent patch.
- Link to v1: https://lore.kernel.org/r/20241108-pks-refs-optimize-migrations-v1-0-7fd37fa80e35@pks.im
Thanks!
Patrick
---
Patrick Steinhardt (10):
refs: allow passing flags when setting up a transaction
refs/files: move logic to commit initial transaction
refs: introduce "initial" transaction flag
refs/files: support symbolic and root refs in initial transaction
refs: use "initial" transaction semantics to migrate refs
refs: skip collision checks in initial transactions
refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
reftable/writer: optimize allocations by using a scratch buffer
reftable/block: rename `block_writer::buf` variable
reftable/block: optimize allocations by using scratch buffer
branch.c | 2 +-
builtin/clone.c | 4 +-
builtin/fast-import.c | 4 +-
builtin/fetch.c | 4 +-
builtin/receive-pack.c | 4 +-
builtin/replace.c | 2 +-
builtin/tag.c | 2 +-
builtin/update-ref.c | 4 +-
refs.c | 70 ++++++-------
refs.h | 45 +++++----
refs/debug.c | 13 ---
refs/files-backend.c | 244 +++++++++++++++++++++++++---------------------
refs/packed-backend.c | 8 --
refs/refs-internal.h | 2 +-
refs/reftable-backend.c | 14 +--
reftable/block.c | 33 +++----
reftable/block.h | 9 +-
reftable/writer.c | 23 +++--
reftable/writer.h | 1 +
sequencer.c | 6 +-
t/helper/test-ref-store.c | 2 +-
t/t1460-refs-migrate.sh | 2 +-
walker.c | 2 +-
23 files changed, 247 insertions(+), 253 deletions(-)
Range-diff versus v1:
1: 059a17163a ! 1: 167a262a0c refs: allow passing flags when setting up a transaction
@@ Commit message
refs: allow passing flags when setting up a transaction
Allow passing flags when setting up a transaction such that the
- behaviour of the transaction itself can be altered. Adapt callers
- accordingly.
+ behaviour of the transaction itself can be altered. This functionality
+ will be used in a subsequent patch.
+
+ Adapt callers accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
2: 2acf64db6b = 2: 39255cdeff refs/files: move logic to commit initial transaction
3: bac7ca1d80 = 3: 061950cf39 refs: introduce "initial" transaction flag
4: 1101c9da75 = 4: a559563c57 refs/files: support symbolic and root refs in initial transaction
5: 75e769ec3e = 5: 8605fb50bc refs: use "initial" transaction semantics to migrate refs
6: a91fe4e2d5 = 6: eb9cacdaa6 refs: skip collision checks in initial transactions
7: 55901bd45f = 7: 6ac43c46b7 refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
8: d15006b445 = 8: 3b192c1e02 reftable/writer: optimize allocations by using a scratch buffer
9: 4b5603743f = 9: 96cd8b8287 reftable/block: rename `block_writer::buf` variable
10: 4dd22bdbef = 10: a36318ece6 reftable/block: optimize allocations by using scratch buffer
---
base-commit: facbe4f633e4ad31e641f64617bc88074c659959
change-id: 20241108-pks-refs-optimize-migrations-6d0ceee4abb7
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v2 01/10] refs: allow passing flags when setting up a transaction
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 10:19 ` Christian Couder
2024-11-20 7:51 ` [PATCH v2 02/10] refs/files: move logic to commit initial transaction Patrick Steinhardt
` (8 subsequent siblings)
9 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Allow passing flags when setting up a transaction such that the
behaviour of the transaction itself can be altered. This functionality
will be used in a subsequent patch.
Adapt callers accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
branch.c | 2 +-
builtin/clone.c | 2 +-
builtin/fast-import.c | 4 ++--
builtin/fetch.c | 4 ++--
builtin/receive-pack.c | 4 ++--
builtin/replace.c | 2 +-
builtin/tag.c | 2 +-
builtin/update-ref.c | 4 ++--
refs.c | 12 +++++++-----
refs.h | 3 ++-
refs/files-backend.c | 11 +++++++----
refs/refs-internal.h | 1 +
sequencer.c | 6 +++---
walker.c | 2 +-
14 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/branch.c b/branch.c
index 44977ad0aadbd40c878a0475ef2df2a20798936b..ebaa870c018747358255d3f150d136f0df447d5d 100644
--- a/branch.c
+++ b/branch.c
@@ -627,7 +627,7 @@ void create_branch(struct repository *r,
else
msg = xstrfmt("branch: Created from %s", start_name);
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf,
&oid, forcing ? NULL : null_oid(),
diff --git a/builtin/clone.c b/builtin/clone.c
index 59fcb317a68a77eee3ca96a60720c556e044c369..d963cc6eb5181e1af5bb29c07c3ee2fa24ad04ca 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs)
struct strbuf err = STRBUF_INIT;
t = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!t)
die("%s", err.buf);
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f141f42da988dddae0e549144d1379031..db82c37a06f05d25e0a8c34cbec055e6f9717191 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1634,7 +1634,7 @@ static int update_branch(struct branch *b)
}
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, b->name, &b->oid, &old_oid,
NULL, NULL, 0, msg, &err) ||
@@ -1669,7 +1669,7 @@ static void dump_tags(void)
struct ref_transaction *transaction;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
failure |= error("%s", err.buf);
goto cleanup;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index d9027e4dc9245a32a87c47d89f9a29fcfd42534c..68d291c7dd0a4fe805e3b006cc68b185481fba6b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -669,7 +669,7 @@ static int s_update_ref(const char *action,
*/
if (!transaction) {
transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
ret = STORE_REF_ERROR_OTHER;
goto out;
@@ -1671,7 +1671,7 @@ static int do_fetch(struct transport *transport,
if (atomic_fetch) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
retcode = -1;
goto cleanup;
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index ab5b20e39c5038cdf6e6f05f4d66278a9c1ac156..9d2c07f68dafb6fce87cb59abc1cbf27db9aae09 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1849,7 +1849,7 @@ static void execute_commands_non_atomic(struct command *commands,
continue;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
@@ -1878,7 +1878,7 @@ static void execute_commands_atomic(struct command *commands,
const char *reported_error = "atomic push failure";
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
diff --git a/builtin/replace.c b/builtin/replace.c
index a44f4e7ea9ff55dbd1f102aca5e5f5a046391328..a4eaadff91f1be107a8e0e25a701d2006ff8cac2 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -201,7 +201,7 @@ static int replace_object_oid(const char *object_ref,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf, repl, &prev,
NULL, NULL, 0, NULL, &err) ||
diff --git a/builtin/tag.c b/builtin/tag.c
index 93d10d59157d2ee1b41f90640bd162917f1eb162..3fa0ab111344dda477763a74d26076b6fb71a5ab 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -681,7 +681,7 @@ int cmd_tag(int argc,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf, &object, &prev,
NULL, NULL,
diff --git a/builtin/update-ref.c b/builtin/update-ref.c
index 8a98615dc8613a1be3b17c6d688ab9c0208ed003..670e7812d60ea88a20cd1c5cd113643095cb3be1 100644
--- a/builtin/update-ref.c
+++ b/builtin/update-ref.c
@@ -612,7 +612,7 @@ static void update_refs_stdin(void)
int i, j;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction)
die("%s", err.buf);
@@ -680,7 +680,7 @@ static void update_refs_stdin(void)
*/
state = cmd->state;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction)
die("%s", err.buf);
diff --git a/refs.c b/refs.c
index 5f729ed4124f7fe8fa9df7fd1f1ce951abefc585..9effeb01eb45728514eab0ca92404ea8cf2158d9 100644
--- a/refs.c
+++ b/refs.c
@@ -918,7 +918,7 @@ int refs_delete_ref(struct ref_store *refs, const char *msg,
struct ref_transaction *transaction;
struct strbuf err = STRBUF_INIT;
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction ||
ref_transaction_delete(transaction, refname, old_oid,
NULL, flags, msg, &err) ||
@@ -1116,6 +1116,7 @@ int read_ref_at(struct ref_store *refs, const char *refname,
}
struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
+ unsigned int flags,
struct strbuf *err)
{
struct ref_transaction *tr;
@@ -1123,6 +1124,7 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
CALLOC_ARRAY(tr, 1);
tr->ref_store = refs;
+ tr->flags = flags;
return tr;
}
@@ -1309,7 +1311,7 @@ int refs_update_ref(struct ref_store *refs, const char *msg,
struct strbuf err = STRBUF_INIT;
int ret = 0;
- t = ref_store_transaction_begin(refs, &err);
+ t = ref_store_transaction_begin(refs, 0, &err);
if (!t ||
ref_transaction_update(t, refname, new_oid, old_oid, NULL, NULL,
flags, msg, &err) ||
@@ -2120,7 +2122,7 @@ int refs_update_symref(struct ref_store *refs, const char *ref,
struct strbuf err = STRBUF_INIT;
int ret = 0;
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref, NULL, NULL,
target, NULL, REF_NO_DEREF,
@@ -2527,7 +2529,7 @@ int refs_delete_refs(struct ref_store *refs, const char *logmsg,
* individual updates can't fail, so we can pack all of the
* updates into a single transaction.
*/
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction) {
ret = error("%s", err.buf);
goto out;
@@ -2833,7 +2835,7 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- transaction = ref_store_transaction_begin(new_refs, errbuf);
+ transaction = ref_store_transaction_begin(new_refs, 0, errbuf);
if (!transaction)
goto done;
diff --git a/refs.h b/refs.h
index 108dfc93b3428db491916ad8a55daea649d83ffd..9821f3e80d900b31c3dede489c2f415d968233d7 100644
--- a/refs.h
+++ b/refs.h
@@ -234,7 +234,7 @@ char *repo_default_branch_name(struct repository *r, int quiet);
* struct strbuf err = STRBUF_INIT;
* int ret = 0;
*
- * transaction = ref_store_transaction_begin(refs, &err);
+ * transaction = ref_store_transaction_begin(refs, 0, &err);
* if (!transaction ||
* ref_transaction_update(...) ||
* ref_transaction_create(...) ||
@@ -584,6 +584,7 @@ enum action_on_err {
* be freed by calling ref_transaction_free().
*/
struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
+ unsigned int flags,
struct strbuf *err);
/*
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 0824c0b8a946909791da36ddb4171db4ad98913b..df61057c9f24972b72644407cc95057891338d96 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1252,7 +1252,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r)
if (check_refname_format(r->name, 0))
return;
- transaction = ref_store_transaction_begin(&refs->base, &err);
+ transaction = ref_store_transaction_begin(&refs->base, 0, &err);
if (!transaction)
goto cleanup;
ref_transaction_add_update(
@@ -1396,7 +1396,8 @@ static int files_pack_refs(struct ref_store *ref_store,
if (!should_pack_refs(refs, opts))
return 0;
- transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
+ transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ 0, &err);
if (!transaction)
return -1;
@@ -2867,7 +2868,8 @@ static int files_transaction_prepare(struct ref_store *ref_store,
*/
if (!packed_transaction) {
packed_transaction = ref_store_transaction_begin(
- refs->packed_ref_store, err);
+ refs->packed_ref_store,
+ transaction->flags, err);
if (!packed_transaction) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
@@ -3174,7 +3176,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
&affected_refnames))
BUG("initial ref transaction called with existing refs");
- packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, err);
+ packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ transaction->flags, err);
if (!packed_transaction) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index 2313c830d8facaa17b0b4b073df0de958023062a..dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -193,6 +193,7 @@ struct ref_transaction {
size_t nr;
enum ref_transaction_state state;
void *backend_data;
+ unsigned int flags;
};
/*
diff --git a/sequencer.c b/sequencer.c
index 353d804999b88d5fd5dcf1254d80781f20e62f2e..287f4e5e8766327da6f31e87ce0c20f63b302b77 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -662,7 +662,7 @@ static int fast_forward_to(struct repository *r,
strbuf_addf(&sb, "%s: fast-forward", action_name(opts));
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, "HEAD",
to, unborn && !is_rebase_i(opts) ?
@@ -1297,7 +1297,7 @@ int update_head_with_reflog(const struct commit *old_head,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- err);
+ 0, err);
if (!transaction ||
ref_transaction_update(transaction, "HEAD", new_head,
old_head ? &old_head->object.oid : null_oid(),
@@ -3890,7 +3890,7 @@ static int do_label(struct repository *r, const char *name, int len)
strbuf_addf(&ref_name, "refs/rewritten/%.*s", len, name);
strbuf_addf(&msg, "rebase (label) '%.*s'", len, name);
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction) {
error("%s", err.buf);
ret = -1;
diff --git a/walker.c b/walker.c
index 5ea7e5b392b2bd49f249a9acc8d7ce8779357e1b..7cc9dbea46d64d6bd3336025d640f284a6202157 100644
--- a/walker.c
+++ b/walker.c
@@ -290,7 +290,7 @@ int walker_fetch(struct walker *walker, int targets, char **target,
if (write_ref) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
error("%s", err.buf);
goto done;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH v2 01/10] refs: allow passing flags when setting up a transaction
2024-11-20 7:51 ` [PATCH v2 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
@ 2024-11-20 10:19 ` Christian Couder
0 siblings, 0 replies; 54+ messages in thread
From: Christian Couder @ 2024-11-20 10:19 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, karthik nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 8:54 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> Allow passing flags when setting up a transaction such that the
> behaviour of the transaction itself can be altered. This functionality
> will be used in a subsequent patch.
>
> Adapt callers accordingly.
From reading the above, it might seem that the change is only about
adding a new 'flags' argument to the ref_store_transaction_begin()
function that sets up a transaction. When looking at the patch though,
it appears that it's also about adding a 'flags' field to 'struct
ref_transaction'. It might be nice to mention that in the commit
message too.
> diff --git a/refs/refs-internal.h b/refs/refs-internal.h
> index 2313c830d8facaa17b0b4b073df0de958023062a..dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee 100644
> --- a/refs/refs-internal.h
> +++ b/refs/refs-internal.h
> @@ -193,6 +193,7 @@ struct ref_transaction {
> size_t nr;
> enum ref_transaction_state state;
> void *backend_data;
> + unsigned int flags;
> };
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v2 02/10] refs/files: move logic to commit initial transaction
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 03/10] refs: introduce "initial" transaction flag Patrick Steinhardt
` (7 subsequent siblings)
9 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Move the logic to commit initial transactions such that we can start to
call it in `files_transaction_finish()` in a subsequent commit without
requiring a separate function declaration.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs/files-backend.c | 202 +++++++++++++++++++++++++--------------------------
1 file changed, 101 insertions(+), 101 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index df61057c9f24972b72644407cc95057891338d96..f37c805a34167b3749fbe724788180975abdae90 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2975,6 +2975,107 @@ static int parse_and_write_reflog(struct files_ref_store *refs,
return 0;
}
+static int ref_present(const char *refname, const char *referent UNUSED,
+ const struct object_id *oid UNUSED,
+ int flags UNUSED,
+ void *cb_data)
+{
+ struct string_list *affected_refnames = cb_data;
+
+ return string_list_has_string(affected_refnames, refname);
+}
+
+static int files_initial_transaction_commit(struct ref_store *ref_store,
+ struct ref_transaction *transaction,
+ struct strbuf *err)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_WRITE,
+ "initial_ref_transaction_commit");
+ size_t i;
+ int ret = 0;
+ struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
+ struct ref_transaction *packed_transaction = NULL;
+
+ assert(err);
+
+ if (transaction->state != REF_TRANSACTION_OPEN)
+ BUG("commit called for transaction that is not open");
+
+ /* Fail if a refname appears more than once in the transaction: */
+ for (i = 0; i < transaction->nr; i++)
+ string_list_append(&affected_refnames,
+ transaction->updates[i]->refname);
+ string_list_sort(&affected_refnames);
+ if (ref_update_reject_duplicates(&affected_refnames, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ /*
+ * It's really undefined to call this function in an active
+ * repository or when there are existing references: we are
+ * only locking and changing packed-refs, so (1) any
+ * simultaneous processes might try to change a reference at
+ * the same time we do, and (2) any existing loose versions of
+ * the references that we are setting would have precedence
+ * over our values. But some remote helpers create the remote
+ * "HEAD" and "master" branches before calling this function,
+ * so here we really only check that none of the references
+ * that we are creating already exists.
+ */
+ if (refs_for_each_rawref(&refs->base, ref_present,
+ &affected_refnames))
+ BUG("initial ref transaction called with existing refs");
+
+ packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ transaction->flags, err);
+ if (!packed_transaction) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ for (i = 0; i < transaction->nr; i++) {
+ struct ref_update *update = transaction->updates[i];
+
+ if ((update->flags & REF_HAVE_OLD) &&
+ !is_null_oid(&update->old_oid))
+ BUG("initial ref transaction with old_sha1 set");
+ if (refs_verify_refname_available(&refs->base, update->refname,
+ &affected_refnames, NULL,
+ err)) {
+ ret = TRANSACTION_NAME_CONFLICT;
+ goto cleanup;
+ }
+
+ /*
+ * Add a reference creation for this reference to the
+ * packed-refs transaction:
+ */
+ ref_transaction_add_update(packed_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ &update->new_oid, &update->old_oid,
+ NULL, NULL, NULL);
+ }
+
+ if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ if (initial_ref_transaction_commit(packed_transaction, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ }
+
+ packed_refs_unlock(refs->packed_ref_store);
+cleanup:
+ if (packed_transaction)
+ ref_transaction_free(packed_transaction);
+ transaction->state = REF_TRANSACTION_CLOSED;
+ string_list_clear(&affected_refnames, 0);
+ return ret;
+}
+
static int files_transaction_finish(struct ref_store *ref_store,
struct ref_transaction *transaction,
struct strbuf *err)
@@ -3123,107 +3224,6 @@ static int files_transaction_abort(struct ref_store *ref_store,
return 0;
}
-static int ref_present(const char *refname, const char *referent UNUSED,
- const struct object_id *oid UNUSED,
- int flags UNUSED,
- void *cb_data)
-{
- struct string_list *affected_refnames = cb_data;
-
- return string_list_has_string(affected_refnames, refname);
-}
-
-static int files_initial_transaction_commit(struct ref_store *ref_store,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct files_ref_store *refs =
- files_downcast(ref_store, REF_STORE_WRITE,
- "initial_ref_transaction_commit");
- size_t i;
- int ret = 0;
- struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
- struct ref_transaction *packed_transaction = NULL;
-
- assert(err);
-
- if (transaction->state != REF_TRANSACTION_OPEN)
- BUG("commit called for transaction that is not open");
-
- /* Fail if a refname appears more than once in the transaction: */
- for (i = 0; i < transaction->nr; i++)
- string_list_append(&affected_refnames,
- transaction->updates[i]->refname);
- string_list_sort(&affected_refnames);
- if (ref_update_reject_duplicates(&affected_refnames, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- /*
- * It's really undefined to call this function in an active
- * repository or when there are existing references: we are
- * only locking and changing packed-refs, so (1) any
- * simultaneous processes might try to change a reference at
- * the same time we do, and (2) any existing loose versions of
- * the references that we are setting would have precedence
- * over our values. But some remote helpers create the remote
- * "HEAD" and "master" branches before calling this function,
- * so here we really only check that none of the references
- * that we are creating already exists.
- */
- if (refs_for_each_rawref(&refs->base, ref_present,
- &affected_refnames))
- BUG("initial ref transaction called with existing refs");
-
- packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
- transaction->flags, err);
- if (!packed_transaction) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- for (i = 0; i < transaction->nr; i++) {
- struct ref_update *update = transaction->updates[i];
-
- if ((update->flags & REF_HAVE_OLD) &&
- !is_null_oid(&update->old_oid))
- BUG("initial ref transaction with old_sha1 set");
- if (refs_verify_refname_available(&refs->base, update->refname,
- &affected_refnames, NULL,
- err)) {
- ret = TRANSACTION_NAME_CONFLICT;
- goto cleanup;
- }
-
- /*
- * Add a reference creation for this reference to the
- * packed-refs transaction:
- */
- ref_transaction_add_update(packed_transaction, update->refname,
- update->flags & ~REF_HAVE_OLD,
- &update->new_oid, &update->old_oid,
- NULL, NULL, NULL);
- }
-
- if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- if (initial_ref_transaction_commit(packed_transaction, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- }
-
- packed_refs_unlock(refs->packed_ref_store);
-cleanup:
- if (packed_transaction)
- ref_transaction_free(packed_transaction);
- transaction->state = REF_TRANSACTION_CLOSED;
- string_list_clear(&affected_refnames, 0);
- return ret;
-}
-
struct expire_reflog_cb {
reflog_expiry_should_prune_fn *should_prune_fn;
void *policy_cb;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v2 03/10] refs: introduce "initial" transaction flag
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 02/10] refs/files: move logic to commit initial transaction Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
` (6 subsequent siblings)
9 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
There are two different ways to commit a transaction:
- `ref_transaction_commit()` can be used to commit a regular
transaction and is what almost every caller wants.
- `initial_ref_transaction_commit()` can be used when it is known that
the ref store that the transaction is committed for is empty and
when there are no concurrent processes. This is used when cloning a
new repository.
Implementing this via two separate functions has a couple of downsides.
First, every reference backend needs to implement a separate callback
even in the case where they don't special-case the initial transaction.
Second, backends are basically forced to reimplement the whole logic for
how to commit the transaction like the "files" backend does, even though
backends may wish to only tweak certain behaviour of a "normal" commit.
Third, it is awkward that callers must never prepare the transaction as
this is somewhat different than how a transaction typically works.
Refactor the code such that we instead mark initial transactions via a
separate flag when starting the transaction. This addresses all of the
mentioned painpoints, where the most important part is that it will
allow backends to have way more leeway in how exactly they want to
handle the initial transaction.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/clone.c | 4 ++--
refs.c | 10 +---------
refs.h | 37 ++++++++++++++++++-------------------
refs/debug.c | 13 -------------
refs/files-backend.c | 16 ++++++++--------
refs/packed-backend.c | 8 --------
refs/refs-internal.h | 1 -
refs/reftable-backend.c | 8 --------
8 files changed, 29 insertions(+), 68 deletions(-)
diff --git a/builtin/clone.c b/builtin/clone.c
index d963cc6eb5181e1af5bb29c07c3ee2fa24ad04ca..8427ccec17e77b23ee1a7f526b80a52d79a71873 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs)
struct strbuf err = STRBUF_INIT;
t = ref_store_transaction_begin(get_main_ref_store(the_repository),
- 0, &err);
+ REF_TRANSACTION_FLAG_INITIAL, &err);
if (!t)
die("%s", err.buf);
@@ -586,7 +586,7 @@ static void write_remote_refs(const struct ref *local_refs)
die("%s", err.buf);
}
- if (initial_ref_transaction_commit(t, &err))
+ if (ref_transaction_commit(t, &err))
die("%s", err.buf);
strbuf_release(&err);
diff --git a/refs.c b/refs.c
index 9effeb01eb45728514eab0ca92404ea8cf2158d9..8b9dfc6173fd144fe9a172cb81cf33057ae31368 100644
--- a/refs.c
+++ b/refs.c
@@ -2315,7 +2315,7 @@ int ref_transaction_commit(struct ref_transaction *transaction,
}
ret = refs->be->transaction_finish(refs, transaction, err);
- if (!ret)
+ if (!ret && !(transaction->flags & REF_TRANSACTION_FLAG_INITIAL))
run_transaction_hook(transaction, "committed");
return ret;
}
@@ -2486,14 +2486,6 @@ int refs_reflog_expire(struct ref_store *refs,
cleanup_fn, policy_cb_data);
}
-int initial_ref_transaction_commit(struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct ref_store *refs = transaction->ref_store;
-
- return refs->be->initial_transaction_commit(refs, transaction, err);
-}
-
void ref_transaction_for_each_queued_update(struct ref_transaction *transaction,
ref_transaction_for_each_queued_update_fn cb,
void *cb_data)
diff --git a/refs.h b/refs.h
index 9821f3e80d900b31c3dede489c2f415d968233d7..024a370554e013d66febee635e4c0415ba061fe6 100644
--- a/refs.h
+++ b/refs.h
@@ -214,11 +214,9 @@ char *repo_default_branch_name(struct repository *r, int quiet);
*
* Or
*
- * - Call `initial_ref_transaction_commit()` if the ref database is
- * known to be empty and have no other writers (e.g. during
- * clone). This is likely to be much faster than
- * `ref_transaction_commit()`. `ref_transaction_prepare()` should
- * *not* be called before `initial_ref_transaction_commit()`.
+ * - Call `ref_transaction_begin()` with REF_TRANSACTION_FLAG_INITIAL if the
+ * ref database is known to be empty and have no other writers (e.g. during
+ * clone). This is likely to be much faster than without the flag.
*
* - Then finally, call `ref_transaction_free()` to free the
* `ref_transaction` data structure.
@@ -579,6 +577,21 @@ enum action_on_err {
UPDATE_REFS_QUIET_ON_ERR
};
+enum ref_transaction_flag {
+ /*
+ * The ref transaction is part of the initial creation of the ref store
+ * and can thus assume that the ref store is completely empty. This
+ * allows the backend to perform the transaction more efficiently by
+ * skipping certain checks.
+ *
+ * It is a bug to set this flag when there might be other processes
+ * accessing the repository or if there are existing references that
+ * might conflict with the ones being created. All old_oid values must
+ * either be absent or null_oid.
+ */
+ REF_TRANSACTION_FLAG_INITIAL = (1 << 0),
+};
+
/*
* Begin a reference transaction. The reference transaction must
* be freed by calling ref_transaction_free().
@@ -798,20 +811,6 @@ int ref_transaction_commit(struct ref_transaction *transaction,
int ref_transaction_abort(struct ref_transaction *transaction,
struct strbuf *err);
-/*
- * Like ref_transaction_commit(), but optimized for creating
- * references when originally initializing a repository (e.g., by "git
- * clone"). It writes the new references directly to packed-refs
- * without locking the individual references.
- *
- * It is a bug to call this function when there might be other
- * processes accessing the repository or if there are existing
- * references that might conflict with the ones being created. All
- * old_oid values must either be absent or null_oid.
- */
-int initial_ref_transaction_commit(struct ref_transaction *transaction,
- struct strbuf *err);
-
/*
* Execute the given callback function for each of the reference updates which
* have been queued in the given transaction. `old_oid` and `new_oid` may be
diff --git a/refs/debug.c b/refs/debug.c
index 45e2e784a0f8c49f492eaa9d371aa44f8c7916c3..cbac62c8a4f924580e80f106f87639daf77cef5c 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -118,18 +118,6 @@ static int debug_transaction_abort(struct ref_store *refs,
return res;
}
-static int debug_initial_transaction_commit(struct ref_store *refs,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct debug_ref_store *drefs = (struct debug_ref_store *)refs;
- int res;
- transaction->ref_store = drefs->refs;
- res = drefs->refs->be->initial_transaction_commit(drefs->refs,
- transaction, err);
- return res;
-}
-
static int debug_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts)
{
struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
@@ -443,7 +431,6 @@ struct ref_storage_be refs_be_debug = {
.transaction_prepare = debug_transaction_prepare,
.transaction_finish = debug_transaction_finish,
.transaction_abort = debug_transaction_abort,
- .initial_transaction_commit = debug_initial_transaction_commit,
.pack_refs = debug_pack_refs,
.rename_ref = debug_rename_ref,
diff --git a/refs/files-backend.c b/refs/files-backend.c
index f37c805a34167b3749fbe724788180975abdae90..3ed18475a72aa4798d15b2912c37b4caabd47aac 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2781,6 +2781,8 @@ static int files_transaction_prepare(struct ref_store *ref_store,
assert(err);
+ if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL)
+ goto cleanup;
if (!transaction->nr)
goto cleanup;
@@ -2985,13 +2987,10 @@ static int ref_present(const char *refname, const char *referent UNUSED,
return string_list_has_string(affected_refnames, refname);
}
-static int files_initial_transaction_commit(struct ref_store *ref_store,
+static int files_transaction_finish_initial(struct files_ref_store *refs,
struct ref_transaction *transaction,
struct strbuf *err)
{
- struct files_ref_store *refs =
- files_downcast(ref_store, REF_STORE_WRITE,
- "initial_ref_transaction_commit");
size_t i;
int ret = 0;
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
@@ -2999,8 +2998,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
assert(err);
- if (transaction->state != REF_TRANSACTION_OPEN)
- BUG("commit called for transaction that is not open");
+ if (transaction->state != REF_TRANSACTION_PREPARED)
+ BUG("commit called for transaction that is not prepared");
/* Fail if a refname appears more than once in the transaction: */
for (i = 0; i < transaction->nr; i++)
@@ -3063,7 +3062,7 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
goto cleanup;
}
- if (initial_ref_transaction_commit(packed_transaction, err)) {
+ if (ref_transaction_commit(packed_transaction, err)) {
ret = TRANSACTION_GENERIC_ERROR;
}
@@ -3091,6 +3090,8 @@ static int files_transaction_finish(struct ref_store *ref_store,
assert(err);
+ if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL)
+ return files_transaction_finish_initial(refs, transaction, err);
if (!transaction->nr) {
transaction->state = REF_TRANSACTION_CLOSED;
return 0;
@@ -3617,7 +3618,6 @@ struct ref_storage_be refs_be_files = {
.transaction_prepare = files_transaction_prepare,
.transaction_finish = files_transaction_finish,
.transaction_abort = files_transaction_abort,
- .initial_transaction_commit = files_initial_transaction_commit,
.pack_refs = files_pack_refs,
.rename_ref = files_rename_ref,
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 07c57fd541a5039d5fcb93d9bf78e1916f67b219..794471de60c78494f1f2b0e9de013422e3e7625d 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1730,13 +1730,6 @@ static int packed_transaction_finish(struct ref_store *ref_store,
return ret;
}
-static int packed_initial_transaction_commit(struct ref_store *ref_store UNUSED,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- return ref_transaction_commit(transaction, err);
-}
-
static int packed_pack_refs(struct ref_store *ref_store UNUSED,
struct pack_refs_opts *pack_opts UNUSED)
{
@@ -1769,7 +1762,6 @@ struct ref_storage_be refs_be_packed = {
.transaction_prepare = packed_transaction_prepare,
.transaction_finish = packed_transaction_finish,
.transaction_abort = packed_transaction_abort,
- .initial_transaction_commit = packed_initial_transaction_commit,
.pack_refs = packed_pack_refs,
.rename_ref = NULL,
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee..33335d54c9162755c70174093017439c0018f36d 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -666,7 +666,6 @@ struct ref_storage_be {
ref_transaction_prepare_fn *transaction_prepare;
ref_transaction_finish_fn *transaction_finish;
ref_transaction_abort_fn *transaction_abort;
- ref_transaction_commit_fn *initial_transaction_commit;
pack_refs_fn *pack_refs;
rename_ref_fn *rename_ref;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 38eb14d591ec0c7c221b6b0f7483e547748e7f1c..8e914afc817f198bed3199630b430e179cabc740 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1490,13 +1490,6 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED,
return ret;
}
-static int reftable_be_initial_transaction_commit(struct ref_store *ref_store UNUSED,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- return ref_transaction_commit(transaction, err);
-}
-
static int reftable_be_pack_refs(struct ref_store *ref_store,
struct pack_refs_opts *opts)
{
@@ -2490,7 +2483,6 @@ struct ref_storage_be refs_be_reftable = {
.transaction_prepare = reftable_be_transaction_prepare,
.transaction_finish = reftable_be_transaction_finish,
.transaction_abort = reftable_be_transaction_abort,
- .initial_transaction_commit = reftable_be_initial_transaction_commit,
.pack_refs = reftable_be_pack_refs,
.rename_ref = reftable_be_rename_ref,
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v2 04/10] refs/files: support symbolic and root refs in initial transaction
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (2 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 03/10] refs: introduce "initial" transaction flag Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
` (5 subsequent siblings)
9 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
The "files" backend has implemented special logic when committing
the first transactions in an otherwise empty ref store: instead of
writing all refs as separate loose files, it instead knows to write them
all into a "packed-refs" file directly. This is significantly more
efficient than having to write each of the refs as separate "loose" ref.
The only user of this optimization is git-clone(1), which only uses this
mechanism to write regular refs. Consequently, the implementation does
not know how to handle both symbolic and root refs. While fine in the
context of git-clone(1), this keeps us from using the mechanism in more
cases.
Adapt the logic to also support symbolic and root refs by using a second
transaction that we use for all of the refs that need to be written as
loose refs.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs/files-backend.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 3ed18475a72aa4798d15b2912c37b4caabd47aac..116d4259697b20583cb2db34ed47025e8781cd42 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2995,6 +2995,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
int ret = 0;
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
struct ref_transaction *packed_transaction = NULL;
+ struct ref_transaction *loose_transaction = NULL;
assert(err);
@@ -3040,6 +3041,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
if ((update->flags & REF_HAVE_OLD) &&
!is_null_oid(&update->old_oid))
BUG("initial ref transaction with old_sha1 set");
+
if (refs_verify_refname_available(&refs->base, update->refname,
&affected_refnames, NULL,
err)) {
@@ -3048,26 +3050,48 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
}
/*
- * Add a reference creation for this reference to the
- * packed-refs transaction:
+ * packed-refs don't support symbolic refs and root refs, so we
+ * have to queue these references via the loose transaction.
*/
- ref_transaction_add_update(packed_transaction, update->refname,
- update->flags & ~REF_HAVE_OLD,
- &update->new_oid, &update->old_oid,
- NULL, NULL, NULL);
+ if (update->new_target || is_root_ref(update->refname)) {
+ if (!loose_transaction) {
+ loose_transaction = ref_store_transaction_begin(&refs->base, 0, err);
+ if (!loose_transaction) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+ }
+
+ ref_transaction_add_update(loose_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ update->new_target ? NULL : &update->new_oid, NULL,
+ update->new_target, NULL, NULL);
+ } else {
+ ref_transaction_add_update(packed_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ &update->new_oid, &update->old_oid,
+ NULL, NULL, NULL);
+ }
}
- if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
+ if (packed_refs_lock(refs->packed_ref_store, 0, err) ||
+ ref_transaction_commit(packed_transaction, err)) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
}
+ packed_refs_unlock(refs->packed_ref_store);
- if (ref_transaction_commit(packed_transaction, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
+ if (loose_transaction) {
+ if (ref_transaction_prepare(loose_transaction, err) ||
+ ref_transaction_commit(loose_transaction, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
}
- packed_refs_unlock(refs->packed_ref_store);
cleanup:
+ if (loose_transaction)
+ ref_transaction_free(loose_transaction);
if (packed_transaction)
ref_transaction_free(packed_transaction);
transaction->state = REF_TRANSACTION_CLOSED;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v2 05/10] refs: use "initial" transaction semantics to migrate refs
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (3 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
` (4 subsequent siblings)
9 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Until now, we couldn't use "initial" transaction semantics to migrate
refs because the "files" backend only supported writing regular refs via
the initial transaction because it simply mapped the transaction to a
"packed-refs" transaction. But with the preceding commit, the "files"
backend has learned to also write symbolic and root refs in the initial
transaction by creating a second transaction for all refs that need to
be written as loose refs.
Adapt the code to migrate refs to commit the transaction as an initial
transaction. This results in a signiticant speedup when migrating many
refs:
Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 3.247 s ± 0.034 s [User: 0.485 s, System: 2.722 s]
Range (min … max): 3.216 s … 3.309 s 10 runs
Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
Time (mean ± σ): 453.6 ms ± 1.9 ms [User: 214.6 ms, System: 230.5 ms]
Range (min … max): 451.5 ms … 456.4 ms 10 runs
Summary
migrate reftable:files (refcount = 100000, revision = HEAD) ran
7.16 ± 0.08 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
As the reftable backend doesn't (yet) special-case initial transactions
there is no comparable speedup for that backend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 10 ++--------
t/t1460-refs-migrate.sh | 2 +-
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/refs.c b/refs.c
index 8b9dfc6173fd144fe9a172cb81cf33057ae31368..0f10c565bbb4e0d91210c52a3221a224e4264d28 100644
--- a/refs.c
+++ b/refs.c
@@ -2827,7 +2827,8 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- transaction = ref_store_transaction_begin(new_refs, 0, errbuf);
+ transaction = ref_store_transaction_begin(new_refs, REF_TRANSACTION_FLAG_INITIAL,
+ errbuf);
if (!transaction)
goto done;
@@ -2852,13 +2853,6 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- /*
- * TODO: we might want to migrate to `initial_ref_transaction_commit()`
- * here, which is more efficient for the files backend because it would
- * write new refs into the packed-refs file directly. At this point,
- * the files backend doesn't handle pseudo-refs and symrefs correctly
- * though, so this requires some more work.
- */
ret = ref_transaction_commit(transaction, errbuf);
if (ret < 0)
goto done;
diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh
index f7c0783d30ccd61b0fee67c115193b42bb0e2c77..b90b38a87f7bb905afeeceb4f9a3bfc8b772e16a 100755
--- a/t/t1460-refs-migrate.sh
+++ b/t/t1460-refs-migrate.sh
@@ -237,7 +237,7 @@ test_expect_success 'migrating from reftable format deletes backend files' '
test_path_is_missing repo/.git/reftable &&
echo "ref: refs/heads/main" >expect &&
test_cmp expect repo/.git/HEAD &&
- test_path_is_file repo/.git/refs/heads/main
+ test_path_is_file repo/.git/packed-refs
'
test_done
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v2 06/10] refs: skip collision checks in initial transactions
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (4 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 10:21 ` Christian Couder
2024-11-20 10:42 ` Kristoffer Haugsbakk
2024-11-20 7:51 ` [PATCH v2 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` Patrick Steinhardt
` (3 subsequent siblings)
9 siblings, 2 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Reference transactions use `refs_verify_refname_available()` to check
for colliding references. This check consists of two parts:
- Checks for whether multiple ref updates in the same transaction
conflict with each other.
- Checks for whether existing refs conflict with any refs part of the
transaction.
While we generally cannot avoid the first check, the second check is
superfluous in cases where the transaction is an initial one in an
otherwise empty ref store. The check results in multiple ref reads as
well as the creation of a ref iterator for every ref we're checking,
which adds up quite fast when performing the check for many refs.
Introduce a new flag that allows us to skip this check and wire it up in
such that the backends pass it when running an initial transaction. This
leads to significant speedups when migrating ref storage backends. From
"files" to "reftable":
Benchmark 1: migrate files:reftable (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 472.4 ms ± 6.7 ms [User: 175.9 ms, System: 285.2 ms]
Range (min … max): 463.5 ms … 483.2 ms 10 runs
Benchmark 2: migrate files:reftable (refcount = 100000, revision = HEAD)
Time (mean ± σ): 86.1 ms ± 1.9 ms [User: 67.9 ms, System: 16.0 ms]
Range (min … max): 82.9 ms … 90.9 ms 29 runs
Summary
migrate files:reftable (refcount = 100000, revision = HEAD) ran
5.48 ± 0.15 times faster than migrate files:reftable (refcount = 100000, revision = HEAD~)
And from "reftable" to "files":
Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 452.7 ms ± 3.4 ms [User: 209.9 ms, System: 235.4 ms]
Range (min … max): 445.9 ms … 457.5 ms 10 runs
Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
Time (mean ± σ): 95.2 ms ± 2.2 ms [User: 73.6 ms, System: 20.6 ms]
Range (min … max): 91.7 ms … 100.8 ms 28 runs
Summary
migrate reftable:files (refcount = 100000, revision = HEAD) ran
4.76 ± 0.11 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 37 +++++++++++++++++++++----------------
refs.h | 5 ++++-
refs/files-backend.c | 13 ++++++-------
refs/reftable-backend.c | 6 ++++--
t/helper/test-ref-store.c | 2 +-
5 files changed, 36 insertions(+), 27 deletions(-)
diff --git a/refs.c b/refs.c
index 0f10c565bbb4e0d91210c52a3221a224e4264d28..d690eb19b3fd7083a5309deb98738547e4f48040 100644
--- a/refs.c
+++ b/refs.c
@@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
+ int initial_transaction,
struct strbuf *err)
{
const char *slash;
@@ -2332,8 +2333,6 @@ int refs_verify_refname_available(struct ref_store *refs,
struct strbuf referent = STRBUF_INIT;
struct object_id oid;
unsigned int type;
- struct ref_iterator *iter;
- int ok;
int ret = -1;
/*
@@ -2363,7 +2362,8 @@ int refs_verify_refname_available(struct ref_store *refs,
if (skip && string_list_has_string(skip, dirname.buf))
continue;
- if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
+ if (!initial_transaction &&
+ !refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
&type, &ignore_errno)) {
strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
dirname.buf, refname);
@@ -2388,21 +2388,26 @@ int refs_verify_refname_available(struct ref_store *refs,
strbuf_addstr(&dirname, refname + dirname.len);
strbuf_addch(&dirname, '/');
- iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
- DO_FOR_EACH_INCLUDE_BROKEN);
- while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
- if (skip &&
- string_list_has_string(skip, iter->refname))
- continue;
+ if (!initial_transaction) {
+ struct ref_iterator *iter;
+ int ok;
- strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
- iter->refname, refname);
- ref_iterator_abort(iter);
- goto cleanup;
- }
+ iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
+ DO_FOR_EACH_INCLUDE_BROKEN);
+ while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
+ if (skip &&
+ string_list_has_string(skip, iter->refname))
+ continue;
- if (ok != ITER_DONE)
- BUG("error while iterating over references");
+ strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
+ iter->refname, refname);
+ ref_iterator_abort(iter);
+ goto cleanup;
+ }
+
+ if (ok != ITER_DONE)
+ BUG("error while iterating over references");
+ }
extra_refname = find_descendant_ref(dirname.buf, extras, skip);
if (extra_refname)
diff --git a/refs.h b/refs.h
index 024a370554e013d66febee635e4c0415ba061fe6..980bd20cf24e15144aeff991eeba8b27a936d386 100644
--- a/refs.h
+++ b/refs.h
@@ -101,13 +101,16 @@ int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname,
* both "foo" and with "foo/bar/baz" but not with "foo/bar" or
* "foo/barbados".
*
+ * If `initial_transaction` is truish, then all collision checks with
+ * preexisting refs are skipped.
+ *
* extras and skip must be sorted.
*/
-
int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
+ int initial_transaction,
struct strbuf *err);
int refs_ref_exists(struct ref_store *refs, const char *refname);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 116d4259697b20583cb2db34ed47025e8781cd42..d27806c02c272f8bddc789b509e3c3c7af4f75aa 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -706,7 +706,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
* reason to expect this error to be transitory.
*/
if (refs_verify_refname_available(&refs->base, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
if (mustexist) {
/*
* To the user the relevant error is
@@ -813,7 +813,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
REMOVE_DIR_EMPTY_ONLY)) {
if (refs_verify_refname_available(
&refs->base, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
/*
* The error message set by
* verify_refname_available() is OK.
@@ -850,7 +850,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
*/
if (refs_verify_refname_available(
refs->packed_ref_store, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
ret = TRANSACTION_NAME_CONFLICT;
goto error_return;
}
@@ -1159,7 +1159,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
*/
if (is_null_oid(&lock->old_oid) &&
refs_verify_refname_available(refs->packed_ref_store, refname,
- NULL, NULL, err))
+ NULL, NULL, 0, err))
goto error_return;
lock->ref_name = xstrdup(refname);
@@ -1538,7 +1538,7 @@ static int refs_rename_ref_available(struct ref_store *refs,
string_list_insert(&skip, old_refname);
ok = !refs_verify_refname_available(refs, new_refname,
- NULL, &skip, &err);
+ NULL, &skip, 0, &err);
if (!ok)
error("%s", err.buf);
@@ -3043,8 +3043,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
BUG("initial ref transaction with old_sha1 set");
if (refs_verify_refname_available(&refs->base, update->refname,
- &affected_refnames, NULL,
- err)) {
+ &affected_refnames, NULL, 1, err)) {
ret = TRANSACTION_NAME_CONFLICT;
goto cleanup;
}
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 8e914afc817f198bed3199630b430e179cabc740..bbc779ab410b41f00759a3a41a76dd708f115c90 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1097,7 +1097,9 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
* at a later point.
*/
ret = refs_verify_refname_available(ref_store, u->refname,
- &affected_refnames, NULL, err);
+ &affected_refnames, NULL,
+ transaction->flags & REF_TRANSACTION_FLAG_INITIAL,
+ err);
if (ret < 0)
goto done;
@@ -1584,7 +1586,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
if (arg->delete_old)
string_list_insert(&skip, arg->oldname);
ret = refs_verify_refname_available(&arg->refs->base, arg->newname,
- NULL, &skip, &errbuf);
+ NULL, &skip, 0, &errbuf);
if (ret < 0) {
error("%s", errbuf.buf);
goto done;
diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c
index 65346dee551ccd781a88786f0c8465f60b286cde..240f6fc29d7f1bb20658deee467bcb46ac3407b2 100644
--- a/t/helper/test-ref-store.c
+++ b/t/helper/test-ref-store.c
@@ -199,7 +199,7 @@ static int cmd_verify_ref(struct ref_store *refs, const char **argv)
struct strbuf err = STRBUF_INIT;
int ret;
- ret = refs_verify_refname_available(refs, refname, NULL, NULL, &err);
+ ret = refs_verify_refname_available(refs, refname, NULL, NULL, 0, &err);
if (err.len)
puts(err.buf);
return ret;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH v2 06/10] refs: skip collision checks in initial transactions
2024-11-20 7:51 ` [PATCH v2 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
@ 2024-11-20 10:21 ` Christian Couder
2024-11-25 5:52 ` Patrick Steinhardt
2024-11-20 10:42 ` Kristoffer Haugsbakk
1 sibling, 1 reply; 54+ messages in thread
From: Christian Couder @ 2024-11-20 10:21 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, karthik nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 8:54 AM Patrick Steinhardt <ps@pks.im> wrote:
> diff --git a/refs.c b/refs.c
> index 0f10c565bbb4e0d91210c52a3221a224e4264d28..d690eb19b3fd7083a5309deb98738547e4f48040 100644
> --- a/refs.c
> +++ b/refs.c
> @@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs,
> const char *refname,
> const struct string_list *extras,
> const struct string_list *skip,
> + int initial_transaction,
Nit: Using 'unsigned int' instead of 'int' might be slightly better as
the type would be the same as "transaction->flags &
REF_TRANSACTION_FLAG_INITIAL" we pass as an argument below. It might
also make it slightly simpler to convert "int initial_transaction" to
"int flags" if we add more flags and need to do that in the future.
(Other functions add an 'int initial_transaction' argument which could
be an 'unsigned int' instead.)
> struct strbuf *err)
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH v2 06/10] refs: skip collision checks in initial transactions
2024-11-20 10:21 ` Christian Couder
@ 2024-11-25 5:52 ` Patrick Steinhardt
0 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 5:52 UTC (permalink / raw)
To: Christian Couder; +Cc: Patrick Steinhardt, git, karthik nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 11:21:44AM +0100, Christian Couder wrote:
> On Wed, Nov 20, 2024 at 8:54 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> > diff --git a/refs.c b/refs.c
> > index 0f10c565bbb4e0d91210c52a3221a224e4264d28..d690eb19b3fd7083a5309deb98738547e4f48040 100644
> > --- a/refs.c
> > +++ b/refs.c
> > @@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs,
> > const char *refname,
> > const struct string_list *extras,
> > const struct string_list *skip,
> > + int initial_transaction,
>
> Nit: Using 'unsigned int' instead of 'int' might be slightly better as
> the type would be the same as "transaction->flags &
> REF_TRANSACTION_FLAG_INITIAL" we pass as an argument below. It might
> also make it slightly simpler to convert "int initial_transaction" to
> "int flags" if we add more flags and need to do that in the future.
> (Other functions add an 'int initial_transaction' argument which could
> be an 'unsigned int' instead.)
Makes sense, will adapt.
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH v2 06/10] refs: skip collision checks in initial transactions
2024-11-20 7:51 ` [PATCH v2 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
2024-11-20 10:21 ` Christian Couder
@ 2024-11-20 10:42 ` Kristoffer Haugsbakk
2024-11-25 5:52 ` Patrick Steinhardt
1 sibling, 1 reply; 54+ messages in thread
From: Kristoffer Haugsbakk @ 2024-11-20 10:42 UTC (permalink / raw)
To: Patrick Steinhardt, git; +Cc: Karthik Nayak, Junio C Hamano
Hi
On Wed, Nov 20, 2024, at 08:51, Patrick Steinhardt wrote:
> While we generally cannot avoid the first check, the second check is
> superfluous in cases where the transaction is an initial one in an
> otherwise empty ref store. The check results in multiple ref reads as
> well as the creation of a ref iterator for every ref we're checking,
> which adds up quite fast when performing the check for many refs.
>
> Introduce a new flag that allows us to skip this check and wire it up in
> such that the backends pass it when running an initial transaction. This
Missing word? “wire it up in such that”.
Maybe: “wire it up so that”.
> leads to significant speedups when migrating ref storage backends. From
> "files" to "reftable":
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH v2 06/10] refs: skip collision checks in initial transactions
2024-11-20 10:42 ` Kristoffer Haugsbakk
@ 2024-11-25 5:52 ` Patrick Steinhardt
0 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 5:52 UTC (permalink / raw)
To: Kristoffer Haugsbakk
Cc: Patrick Steinhardt, git, Karthik Nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 11:42:29AM +0100, Kristoffer Haugsbakk wrote:
> Hi
>
> On Wed, Nov 20, 2024, at 08:51, Patrick Steinhardt wrote:
> > While we generally cannot avoid the first check, the second check is
> > superfluous in cases where the transaction is an initial one in an
> > otherwise empty ref store. The check results in multiple ref reads as
> > well as the creation of a ref iterator for every ref we're checking,
> > which adds up quite fast when performing the check for many refs.
> >
> > Introduce a new flag that allows us to skip this check and wire it up in
> > such that the backends pass it when running an initial transaction. This
>
> Missing word? “wire it up in such that”.
>
> Maybe: “wire it up so that”.
Ah, indeed. Will fix. Thanks!
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v2 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (5 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
` (2 subsequent siblings)
9 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
When the `REF_SKIP_CREATE_REFLOG` flag is set we skip the creation of
the reflog entry, but we still normalize the reflog message when we
queue the update. This is a waste of resources as the normalized message
will never get used in the first place.
Fix this issue by skipping the normalization in case the flag is set.
This leads to a surprisingly large speedup when migrating from the
"files" to the "reftable" backend:
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 878.5 ms ± 14.9 ms [User: 726.5 ms, System: 139.2 ms]
Range (min … max): 858.4 ms … 941.3 ms 50 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 831.1 ms ± 10.5 ms [User: 694.1 ms, System: 126.3 ms]
Range (min … max): 812.4 ms … 851.4 ms 50 runs
Summary
migrate files:reftable (refcount = 1000000, revision = HEAD) ran
1.06 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)
And an ever larger speedup when migrating the other way round:
Benchmark 1: migrate reftable:files (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 923.6 ms ± 11.6 ms [User: 705.5 ms, System: 208.1 ms]
Range (min … max): 905.3 ms … 946.5 ms 50 runs
Benchmark 2: migrate reftable:files (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 818.5 ms ± 9.0 ms [User: 627.6 ms, System: 180.6 ms]
Range (min … max): 802.2 ms … 842.9 ms 50 runs
Summary
migrate reftable:files (refcount = 1000000, revision = HEAD) ran
1.13 ± 0.02 times faster than migrate reftable:files (refcount = 1000000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index d690eb19b3fd7083a5309deb98738547e4f48040..65eea3eb7734d03f09a22e8edfe5074d532398ff 100644
--- a/refs.c
+++ b/refs.c
@@ -1188,8 +1188,9 @@ struct ref_update *ref_transaction_add_update(
oidcpy(&update->new_oid, new_oid);
if ((flags & REF_HAVE_OLD) && old_oid)
oidcpy(&update->old_oid, old_oid);
+ if (!(flags & REF_SKIP_CREATE_REFLOG))
+ update->msg = normalize_reflog_message(msg);
- update->msg = normalize_reflog_message(msg);
return update;
}
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v2 08/10] reftable/writer: optimize allocations by using a scratch buffer
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (6 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 10:21 ` Christian Couder
2024-11-20 7:51 ` [PATCH v2 09/10] reftable/block: rename `block_writer::buf` variable Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
9 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Both `writer_add_record()` and `reftable_writer_add_ref()` get executed
for every single ref record we're adding to the reftable writer. And as
both functions use a local buffer to write data, the allocations we have
to do here add up during larger transactions.
Refactor the code to use a scratch buffer part of the `reftable_writer`
itself such that we can reuse it. This signifcantly reduces the number
of allocations during large transactions, e.g. when migrating refs from
the "files" backend to the "reftable" backend. Before this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 5,032,171 allocs, 5,032,122 frees, 418,792,092 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated
This also translate into a small speedup:
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 827.2 ms ± 16.5 ms [User: 689.4 ms, System: 124.9 ms]
Range (min … max): 809.0 ms … 924.7 ms 50 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 813.6 ms ± 11.6 ms [User: 679.0 ms, System: 123.4 ms]
Range (min … max): 786.7 ms … 833.5 ms 50 runs
Summary
migrate files:reftable (refcount = 1000000, revision = HEAD) ran
1.02 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/writer.c | 23 +++++++++++------------
reftable/writer.h | 1 +
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/reftable/writer.c b/reftable/writer.c
index fd136794d5a27b33b5017f36fbd6b095ab8dac5b..6501376ce826469556a7dfa3c39258847300ae66 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -148,6 +148,7 @@ int reftable_writer_new(struct reftable_writer **out,
reftable_buf_init(&wp->block_writer_data.last_key);
reftable_buf_init(&wp->last_key);
+ reftable_buf_init(&wp->buf);
REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
if (!wp->block) {
reftable_free(wp);
@@ -180,6 +181,7 @@ static void writer_release(struct reftable_writer *w)
w->block_writer = NULL;
writer_clear_index(w);
reftable_buf_release(&w->last_key);
+ reftable_buf_release(&w->buf);
}
}
@@ -249,20 +251,19 @@ static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *has
static int writer_add_record(struct reftable_writer *w,
struct reftable_record *rec)
{
- struct reftable_buf key = REFTABLE_BUF_INIT;
int err;
- err = reftable_record_key(rec, &key);
+ err = reftable_record_key(rec, &w->buf);
if (err < 0)
goto done;
- if (reftable_buf_cmp(&w->last_key, &key) >= 0) {
+ if (reftable_buf_cmp(&w->last_key, &w->buf) >= 0) {
err = REFTABLE_API_ERROR;
goto done;
}
reftable_buf_reset(&w->last_key);
- err = reftable_buf_add(&w->last_key, key.buf, key.len);
+ err = reftable_buf_add(&w->last_key, w->buf.buf, w->buf.len);
if (err < 0)
goto done;
@@ -312,7 +313,6 @@ static int writer_add_record(struct reftable_writer *w,
}
done:
- reftable_buf_release(&key);
return err;
}
@@ -325,7 +325,6 @@ int reftable_writer_add_ref(struct reftable_writer *w,
.ref = *ref
},
};
- struct reftable_buf buf = REFTABLE_BUF_INIT;
int err;
if (!ref->refname ||
@@ -340,24 +339,25 @@ int reftable_writer_add_ref(struct reftable_writer *w,
goto out;
if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
- err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref),
+ reftable_buf_reset(&w->buf);
+ err = reftable_buf_add(&w->buf, (char *)reftable_ref_record_val1(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
+ err = writer_index_hash(w, &w->buf);
if (err < 0)
goto out;
}
if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
- reftable_buf_reset(&buf);
- err = reftable_buf_add(&buf, reftable_ref_record_val2(ref),
+ reftable_buf_reset(&w->buf);
+ err = reftable_buf_add(&w->buf, reftable_ref_record_val2(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
+ err = writer_index_hash(w, &w->buf);
if (err < 0)
goto out;
}
@@ -365,7 +365,6 @@ int reftable_writer_add_ref(struct reftable_writer *w,
err = 0;
out:
- reftable_buf_release(&buf);
return err;
}
diff --git a/reftable/writer.h b/reftable/writer.h
index e8a6fbb78543e6e56920a2999601db0db9fe4d97..421a897dccd85ad0532860ff1b4f38b2813d438d 100644
--- a/reftable/writer.h
+++ b/reftable/writer.h
@@ -20,6 +20,7 @@ struct reftable_writer {
void *write_arg;
int pending_padding;
struct reftable_buf last_key;
+ struct reftable_buf buf;
/* offset of next block to write. */
uint64_t next;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH v2 08/10] reftable/writer: optimize allocations by using a scratch buffer
2024-11-20 7:51 ` [PATCH v2 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
@ 2024-11-20 10:21 ` Christian Couder
2024-11-25 5:52 ` Patrick Steinhardt
0 siblings, 1 reply; 54+ messages in thread
From: Christian Couder @ 2024-11-20 10:21 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, karthik nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 9:07 AM Patrick Steinhardt <ps@pks.im> wrote:
> diff --git a/reftable/writer.h b/reftable/writer.h
> index e8a6fbb78543e6e56920a2999601db0db9fe4d97..421a897dccd85ad0532860ff1b4f38b2813d438d 100644
> --- a/reftable/writer.h
> +++ b/reftable/writer.h
> @@ -20,6 +20,7 @@ struct reftable_writer {
> void *write_arg;
> int pending_padding;
> struct reftable_buf last_key;
> + struct reftable_buf buf;
Nit: It would be nice to add a comment, so that readers don't have to
look at .c files, or the commit message, to find what this field is
used for.
> /* offset of next block to write. */
> uint64_t next;
^ permalink raw reply [flat|nested] 54+ messages in thread
* Re: [PATCH v2 08/10] reftable/writer: optimize allocations by using a scratch buffer
2024-11-20 10:21 ` Christian Couder
@ 2024-11-25 5:52 ` Patrick Steinhardt
0 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 5:52 UTC (permalink / raw)
To: Christian Couder; +Cc: Patrick Steinhardt, git, karthik nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 11:21:54AM +0100, Christian Couder wrote:
> On Wed, Nov 20, 2024 at 9:07 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> > diff --git a/reftable/writer.h b/reftable/writer.h
> > index e8a6fbb78543e6e56920a2999601db0db9fe4d97..421a897dccd85ad0532860ff1b4f38b2813d438d 100644
> > --- a/reftable/writer.h
> > +++ b/reftable/writer.h
> > @@ -20,6 +20,7 @@ struct reftable_writer {
> > void *write_arg;
> > int pending_padding;
> > struct reftable_buf last_key;
> > + struct reftable_buf buf;
>
> Nit: It would be nice to add a comment, so that readers don't have to
> look at .c files, or the commit message, to find what this field is
> used for.
Fair enough. I'll also rename it to `scratch` while at it to clarify its
intent a bit. Other parts of the reftable library already use that name.
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v2 09/10] reftable/block: rename `block_writer::buf` variable
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (7 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 7:51 ` [PATCH v2 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
9 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
Adapt the name of the `block_writer::buf` variable to instead be called
`block`. This aligns it with the existing `block_len` variable, which
tracks the length of this buffer, and is generally a bit more tied to
the actual context where this variable gets used.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block.c | 20 ++++++++++----------
reftable/block.h | 8 ++++----
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/reftable/block.c b/reftable/block.c
index f5b432566a6b9f171a1f1374b6c892ab0696d744..3fa36c002a0c1852790780e74a6e055161f857d9 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -70,14 +70,14 @@ static int block_writer_register_restart(struct block_writer *w, int n,
return 0;
}
-int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
+int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
uint32_t block_size, uint32_t header_off, int hash_size)
{
- bw->buf = buf;
+ bw->block = block;
bw->hash_size = hash_size;
bw->block_size = block_size;
bw->header_off = header_off;
- bw->buf[header_off] = typ;
+ bw->block[header_off] = typ;
bw->next = header_off + 4;
bw->restart_interval = 16;
bw->entries = 0;
@@ -95,7 +95,7 @@ int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
uint8_t block_writer_type(struct block_writer *bw)
{
- return bw->buf[bw->header_off];
+ return bw->block[bw->header_off];
}
/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
@@ -107,7 +107,7 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
struct reftable_buf last =
w->entries % w->restart_interval == 0 ? empty : w->last_key;
struct string_view out = {
- .buf = w->buf + w->next,
+ .buf = w->block + w->next,
.len = w->block_size - w->next,
};
@@ -153,13 +153,13 @@ int block_writer_finish(struct block_writer *w)
{
int i;
for (i = 0; i < w->restart_len; i++) {
- put_be24(w->buf + w->next, w->restarts[i]);
+ put_be24(w->block + w->next, w->restarts[i]);
w->next += 3;
}
- put_be16(w->buf + w->next, w->restart_len);
+ put_be16(w->block + w->next, w->restart_len);
w->next += 2;
- put_be24(w->buf + 1 + w->header_off, w->next);
+ put_be24(w->block + 1 + w->header_off, w->next);
/*
* Log records are stored zlib-compressed. Note that the compression
@@ -188,7 +188,7 @@ int block_writer_finish(struct block_writer *w)
w->zstream->next_out = w->compressed;
w->zstream->avail_out = compressed_len;
- w->zstream->next_in = w->buf + block_header_skip;
+ w->zstream->next_in = w->block + block_header_skip;
w->zstream->avail_in = src_len;
/*
@@ -206,7 +206,7 @@ int block_writer_finish(struct block_writer *w)
* adjust the `next` pointer to point right after the
* compressed data.
*/
- memcpy(w->buf + block_header_skip, w->compressed,
+ memcpy(w->block + block_header_skip, w->compressed,
w->zstream->total_out);
w->next = w->zstream->total_out + block_header_skip;
}
diff --git a/reftable/block.h b/reftable/block.h
index 9a3effa513420039ee3f2834339c5082f64339d0..b3f837d612a8f0fbe98430b04e2dddaa975a15ab 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -22,7 +22,7 @@ struct block_writer {
unsigned char *compressed;
size_t compressed_cap;
- uint8_t *buf;
+ uint8_t *block;
uint32_t block_size;
/* Offset of the global header. Nonzero in the first block only. */
@@ -43,9 +43,9 @@ struct block_writer {
};
/*
- * initializes the blockwriter to write `typ` entries, using `buf` as temporary
- * storage. `buf` is not owned by the block_writer. */
-int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
+ * initializes the blockwriter to write `typ` entries, using `block` as temporary
+ * storage. `block` is not owned by the block_writer. */
+int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
uint32_t block_size, uint32_t header_off, int hash_size);
/* returns the block type (eg. 'r' for ref records. */
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v2 10/10] reftable/block: optimize allocations by using scratch buffer
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
` (8 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 09/10] reftable/block: rename `block_writer::buf` variable Patrick Steinhardt
@ 2024-11-20 7:51 ` Patrick Steinhardt
2024-11-20 10:22 ` Christian Couder
9 siblings, 1 reply; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-20 7:51 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano
The block writer needs to compute the key for every record that one adds
to the writer. The buffer for this key is stored on the stack and thus
reallocated on every call to `block_writer_add()`, which is inefficient.
Refactor the code so that we store the buffer in the `block_writer`
struct itself so that we can reuse it. This reduces the number of
allocations when writing many refs, e.g. when migrating one million refs
from the "files" backend to the "reftable backend. Before this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 2,013,250 allocs, 2,013,201 frees, 347,543,583 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block.c | 13 +++++--------
reftable/block.h | 1 +
2 files changed, 6 insertions(+), 8 deletions(-)
diff --git a/reftable/block.c b/reftable/block.c
index 3fa36c002a0c1852790780e74a6e055161f857d9..1aa7e8cd3cbf0980f6bc20262be89e755d0a4b4b 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -110,24 +110,21 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
.buf = w->block + w->next,
.len = w->block_size - w->next,
};
-
struct string_view start = out;
-
int is_restart = 0;
- struct reftable_buf key = REFTABLE_BUF_INIT;
int n = 0;
int err;
- err = reftable_record_key(rec, &key);
+ err = reftable_record_key(rec, &w->buf);
if (err < 0)
goto done;
- if (!key.len) {
+ if (!w->buf.len) {
err = REFTABLE_API_ERROR;
goto done;
}
- n = reftable_encode_key(&is_restart, out, last, key,
+ n = reftable_encode_key(&is_restart, out, last, w->buf,
reftable_record_val_type(rec));
if (n < 0) {
err = -1;
@@ -143,9 +140,8 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
string_view_consume(&out, n);
err = block_writer_register_restart(w, start.len - out.len, is_restart,
- &key);
+ &w->buf);
done:
- reftable_buf_release(&key);
return err;
}
@@ -569,6 +565,7 @@ void block_writer_release(struct block_writer *bw)
REFTABLE_FREE_AND_NULL(bw->zstream);
REFTABLE_FREE_AND_NULL(bw->restarts);
REFTABLE_FREE_AND_NULL(bw->compressed);
+ reftable_buf_release(&bw->buf);
reftable_buf_release(&bw->last_key);
/* the block is not owned. */
}
diff --git a/reftable/block.h b/reftable/block.h
index b3f837d612a8f0fbe98430b04e2dddaa975a15ab..d76f00553073c10185e716e71e2f415ce5dcf7e2 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -39,6 +39,7 @@ struct block_writer {
uint32_t restart_cap;
struct reftable_buf last_key;
+ struct reftable_buf buf;
int entries;
};
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH v2 10/10] reftable/block: optimize allocations by using scratch buffer
2024-11-20 7:51 ` [PATCH v2 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
@ 2024-11-20 10:22 ` Christian Couder
0 siblings, 0 replies; 54+ messages in thread
From: Christian Couder @ 2024-11-20 10:22 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, karthik nayak, Junio C Hamano
On Wed, Nov 20, 2024 at 8:56 AM Patrick Steinhardt <ps@pks.im> wrote:
> diff --git a/reftable/block.h b/reftable/block.h
> index b3f837d612a8f0fbe98430b04e2dddaa975a15ab..d76f00553073c10185e716e71e2f415ce5dcf7e2 100644
> --- a/reftable/block.h
> +++ b/reftable/block.h
> @@ -39,6 +39,7 @@ struct block_writer {
> uint32_t restart_cap;
>
> struct reftable_buf last_key;
> + struct reftable_buf buf;
Nit (similar as for patch 8/10): It would be nice to add a comment, so
that readers don't have to look at .c files, or the commit message, to
find what this field is used for.
> int entries;
> };
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v3 00/10] refs: optimize ref format migrations
2024-11-08 9:34 [PATCH 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (11 preceding siblings ...)
2024-11-20 7:51 ` [PATCH v2 " Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
` (10 more replies)
12 siblings, 11 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Hi,
I have recently learned that ref format migrations can take a
significant amount of time in the order of minutes when migrating
millions of refs. This is probably not entirely surprising: the initial
focus for the logic to migrate ref backends was mostly focussed on
getting the basic feature working, and I didn't yet invest any time into
optimizing the code path at all. But I was still mildly surprised that
the migration of a couple million refs was taking minutes to finish.
This patch series thus optimizes how we migrate ref formats. This is
mostly done by expanding upon the "initial transaction" semantics that
we already use for git-clone(1). These semantics allow us to assume that
the ref backend is completely empty and that there are no concurrent
writers, and thus we are free to perform certain optimizations that
wouldn't have otherwise been possible. On the one hand this allows us to
drop needless collision checks. On the other hand, it also allows us to
write regular refs directly into the "packed-refs" file when migrating
from the "reftable" backend to the "files" backend.
This leads to some significant speedups. Migrating 1 million refs from
"files" to "reftable":
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = origin/master)
Time (mean ± σ): 4.580 s ± 0.062 s [User: 1.818 s, System: 2.746 s]
Range (min … max): 4.534 s … 4.743 s 10 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = pks-refs-optimize-migrations)
Time (mean ± σ): 767.7 ms ± 9.5 ms [User: 629.2 ms, System: 126.1 ms]
Range (min … max): 755.8 ms … 786.9 ms 10 runs
Summary
migrate files:reftable (refcount = 1000000, revision = pks-refs-optimize-migrations) ran
5.97 ± 0.11 times faster than migrate files:reftable (refcount = 1000000, revision = origin/master)
And migrating from "reftable" to "files:
Benchmark 1: migrate reftable:files (refcount = 1000000, revision = origin/master)
Time (mean ± σ): 35.409 s ± 0.302 s [User: 5.061 s, System: 29.244 s]
Range (min … max): 35.055 s … 35.898 s 10 runs
Benchmark 2: migrate reftable:files (refcount = 1000000, revision = pks-refs-optimize-migrations)
Time (mean ± σ): 855.9 ms ± 61.5 ms [User: 646.7 ms, System: 187.1 ms]
Range (min … max): 830.0 ms … 1030.3 ms 10 runs
Summary
migrate reftable:files (refcount = 1000000, revision = pks-refs-optimize-migrations) ran
41.37 ± 2.99 times faster than migrate reftable:files (refcount = 1000000, revision = origin/master)
Changes in v2:
- Mention in the first commit message that the introduced flag will be
used in a subsequent patch.
- Link to v1: https://lore.kernel.org/r/20241108-pks-refs-optimize-migrations-v1-0-7fd37fa80e35@pks.im
Changes in v3:
- Mention that we store the ref transaction flag for access by the
backend in the first commit message.
- Fix a missing word in another commit message.
- Use `unsigned int` to pass `initial_transaction` flag.
- Rename the scratch buffers and provide a comment for why they exist.
- Link to v2: https://lore.kernel.org/r/20241120-pks-refs-optimize-migrations-v2-0-a233374b7452@pks.im
Thanks!
Patrick
---
Patrick Steinhardt (10):
refs: allow passing flags when setting up a transaction
refs/files: move logic to commit initial transaction
refs: introduce "initial" transaction flag
refs/files: support symbolic and root refs in initial transaction
refs: use "initial" transaction semantics to migrate refs
refs: skip collision checks in initial transactions
refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
reftable/writer: optimize allocations by using a scratch buffer
reftable/block: rename `block_writer::buf` variable
reftable/block: optimize allocations by using scratch buffer
branch.c | 2 +-
builtin/clone.c | 4 +-
builtin/fast-import.c | 4 +-
builtin/fetch.c | 4 +-
builtin/receive-pack.c | 4 +-
builtin/replace.c | 2 +-
builtin/tag.c | 2 +-
builtin/update-ref.c | 4 +-
refs.c | 70 ++++++-------
refs.h | 45 +++++----
refs/debug.c | 13 ---
refs/files-backend.c | 244 +++++++++++++++++++++++++---------------------
refs/packed-backend.c | 8 --
refs/refs-internal.h | 2 +-
refs/reftable-backend.c | 14 +--
reftable/block.c | 33 +++----
reftable/block.h | 10 +-
reftable/writer.c | 23 +++--
reftable/writer.h | 2 +
sequencer.c | 6 +-
t/helper/test-ref-store.c | 2 +-
t/t1460-refs-migrate.sh | 2 +-
walker.c | 2 +-
23 files changed, 249 insertions(+), 253 deletions(-)
Range-diff versus v2:
1: 74d23ad64d ! 1: 64ebf2d3c6 refs: allow passing flags when setting up a transaction
@@ Metadata
## Commit message ##
refs: allow passing flags when setting up a transaction
- Allow passing flags when setting up a transaction such that the
- behaviour of the transaction itself can be altered. This functionality
- will be used in a subsequent patch.
+ Allow passing flags when creating a new transaction. These flagas are
+ stored in the `struct ref_transaction` and can be used by the respective
+ backends to alter their behaviour depending on the flag's value. This
+ functionality will be used in a subsequent patch.
Adapt callers accordingly.
2: b7d50b9270 = 2: 616a5d66de refs/files: move logic to commit initial transaction
3: 2a0fff3748 = 3: 8d4b867641 refs: introduce "initial" transaction flag
4: 3904f337f6 = 4: 70961f6295 refs/files: support symbolic and root refs in initial transaction
5: 131161d467 = 5: ccae50e766 refs: use "initial" transaction semantics to migrate refs
6: a443916bbd ! 6: ec5de7e1a2 refs: skip collision checks in initial transactions
@@ Commit message
well as the creation of a ref iterator for every ref we're checking,
which adds up quite fast when performing the check for many refs.
- Introduce a new flag that allows us to skip this check and wire it up in
- such that the backends pass it when running an initial transaction. This
+ Introduce a new flag that allows us to skip this check and wire it up so
+ that the backends pass it when running an initial transaction. This
leads to significant speedups when migrating ref storage backends. From
"files" to "reftable":
@@ refs.c: int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
-+ int initial_transaction,
++ unsigned int initial_transaction,
struct strbuf *err)
{
const char *slash;
@@ refs.h: int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refn
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
-+ int initial_transaction,
++ unsigned int initial_transaction,
struct strbuf *err);
int refs_ref_exists(struct ref_store *refs, const char *refname);
7: 4bd95e5661 = 7: f60f692ea0 refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
8: de1573c1f7 ! 8: e8881d67db reftable/writer: optimize allocations by using a scratch buffer
@@ reftable/writer.c: int reftable_writer_new(struct reftable_writer **out,
reftable_buf_init(&wp->block_writer_data.last_key);
reftable_buf_init(&wp->last_key);
-+ reftable_buf_init(&wp->buf);
++ reftable_buf_init(&wp->scratch);
REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
if (!wp->block) {
reftable_free(wp);
@@ reftable/writer.c: static void writer_release(struct reftable_writer *w)
w->block_writer = NULL;
writer_clear_index(w);
reftable_buf_release(&w->last_key);
-+ reftable_buf_release(&w->buf);
++ reftable_buf_release(&w->scratch);
}
}
@@ reftable/writer.c: static int writer_index_hash(struct reftable_writer *w, struc
int err;
- err = reftable_record_key(rec, &key);
-+ err = reftable_record_key(rec, &w->buf);
++ err = reftable_record_key(rec, &w->scratch);
if (err < 0)
goto done;
- if (reftable_buf_cmp(&w->last_key, &key) >= 0) {
-+ if (reftable_buf_cmp(&w->last_key, &w->buf) >= 0) {
++ if (reftable_buf_cmp(&w->last_key, &w->scratch) >= 0) {
err = REFTABLE_API_ERROR;
goto done;
}
reftable_buf_reset(&w->last_key);
- err = reftable_buf_add(&w->last_key, key.buf, key.len);
-+ err = reftable_buf_add(&w->last_key, w->buf.buf, w->buf.len);
++ err = reftable_buf_add(&w->last_key, w->scratch.buf, w->scratch.len);
if (err < 0)
goto done;
@@ reftable/writer.c: int reftable_writer_add_ref(struct reftable_writer *w,
if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
- err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref),
-+ reftable_buf_reset(&w->buf);
-+ err = reftable_buf_add(&w->buf, (char *)reftable_ref_record_val1(ref),
++ reftable_buf_reset(&w->scratch);
++ err = reftable_buf_add(&w->scratch, (char *)reftable_ref_record_val1(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
-+ err = writer_index_hash(w, &w->buf);
++ err = writer_index_hash(w, &w->scratch);
if (err < 0)
goto out;
}
@@ reftable/writer.c: int reftable_writer_add_ref(struct reftable_writer *w,
if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
- reftable_buf_reset(&buf);
- err = reftable_buf_add(&buf, reftable_ref_record_val2(ref),
-+ reftable_buf_reset(&w->buf);
-+ err = reftable_buf_add(&w->buf, reftable_ref_record_val2(ref),
++ reftable_buf_reset(&w->scratch);
++ err = reftable_buf_add(&w->scratch, reftable_ref_record_val2(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
-+ err = writer_index_hash(w, &w->buf);
++ err = writer_index_hash(w, &w->scratch);
if (err < 0)
goto out;
}
@@ reftable/writer.h: struct reftable_writer {
void *write_arg;
int pending_padding;
struct reftable_buf last_key;
-+ struct reftable_buf buf;
++ /* Scratch buffer used to avoid allocations. */
++ struct reftable_buf scratch;
/* offset of next block to write. */
uint64_t next;
9: 903fb084d5 = 9: 1127729311 reftable/block: rename `block_writer::buf` variable
10: 27692b8a00 ! 10: 7b8ddad673 reftable/block: optimize allocations by using scratch buffer
@@ reftable/block.c: int block_writer_add(struct block_writer *w, struct reftable_r
int err;
- err = reftable_record_key(rec, &key);
-+ err = reftable_record_key(rec, &w->buf);
++ err = reftable_record_key(rec, &w->scratch);
if (err < 0)
goto done;
- if (!key.len) {
-+ if (!w->buf.len) {
++ if (!w->scratch.len) {
err = REFTABLE_API_ERROR;
goto done;
}
- n = reftable_encode_key(&is_restart, out, last, key,
-+ n = reftable_encode_key(&is_restart, out, last, w->buf,
++ n = reftable_encode_key(&is_restart, out, last, w->scratch,
reftable_record_val_type(rec));
if (n < 0) {
err = -1;
@@ reftable/block.c: int block_writer_add(struct block_writer *w, struct reftable_r
err = block_writer_register_restart(w, start.len - out.len, is_restart,
- &key);
-+ &w->buf);
++ &w->scratch);
done:
- reftable_buf_release(&key);
return err;
@@ reftable/block.c: void block_writer_release(struct block_writer *bw)
REFTABLE_FREE_AND_NULL(bw->zstream);
REFTABLE_FREE_AND_NULL(bw->restarts);
REFTABLE_FREE_AND_NULL(bw->compressed);
-+ reftable_buf_release(&bw->buf);
++ reftable_buf_release(&bw->scratch);
reftable_buf_release(&bw->last_key);
/* the block is not owned. */
}
@@ reftable/block.h: struct block_writer {
uint32_t restart_cap;
struct reftable_buf last_key;
-+ struct reftable_buf buf;
++ /* Scratch buffer used to avoid allocations. */
++ struct reftable_buf scratch;
int entries;
};
---
base-commit: facbe4f633e4ad31e641f64617bc88074c659959
change-id: 20241108-pks-refs-optimize-migrations-6d0ceee4abb7
^ permalink raw reply [flat|nested] 54+ messages in thread
* [PATCH v3 01/10] refs: allow passing flags when setting up a transaction
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 02/10] refs/files: move logic to commit initial transaction Patrick Steinhardt
` (9 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Allow passing flags when creating a new transaction. These flagas are
stored in the `struct ref_transaction` and can be used by the respective
backends to alter their behaviour depending on the flag's value. This
functionality will be used in a subsequent patch.
Adapt callers accordingly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
branch.c | 2 +-
builtin/clone.c | 2 +-
builtin/fast-import.c | 4 ++--
builtin/fetch.c | 4 ++--
builtin/receive-pack.c | 4 ++--
builtin/replace.c | 2 +-
builtin/tag.c | 2 +-
builtin/update-ref.c | 4 ++--
refs.c | 12 +++++++-----
refs.h | 3 ++-
refs/files-backend.c | 11 +++++++----
refs/refs-internal.h | 1 +
sequencer.c | 6 +++---
walker.c | 2 +-
14 files changed, 33 insertions(+), 26 deletions(-)
diff --git a/branch.c b/branch.c
index 44977ad0aadbd40c878a0475ef2df2a20798936b..ebaa870c018747358255d3f150d136f0df447d5d 100644
--- a/branch.c
+++ b/branch.c
@@ -627,7 +627,7 @@ void create_branch(struct repository *r,
else
msg = xstrfmt("branch: Created from %s", start_name);
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf,
&oid, forcing ? NULL : null_oid(),
diff --git a/builtin/clone.c b/builtin/clone.c
index 59fcb317a68a77eee3ca96a60720c556e044c369..d963cc6eb5181e1af5bb29c07c3ee2fa24ad04ca 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs)
struct strbuf err = STRBUF_INIT;
t = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!t)
die("%s", err.buf);
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 76d5c20f141f42da988dddae0e549144d1379031..db82c37a06f05d25e0a8c34cbec055e6f9717191 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -1634,7 +1634,7 @@ static int update_branch(struct branch *b)
}
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, b->name, &b->oid, &old_oid,
NULL, NULL, 0, msg, &err) ||
@@ -1669,7 +1669,7 @@ static void dump_tags(void)
struct ref_transaction *transaction;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
failure |= error("%s", err.buf);
goto cleanup;
diff --git a/builtin/fetch.c b/builtin/fetch.c
index d9027e4dc9245a32a87c47d89f9a29fcfd42534c..68d291c7dd0a4fe805e3b006cc68b185481fba6b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -669,7 +669,7 @@ static int s_update_ref(const char *action,
*/
if (!transaction) {
transaction = our_transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
ret = STORE_REF_ERROR_OTHER;
goto out;
@@ -1671,7 +1671,7 @@ static int do_fetch(struct transport *transport,
if (atomic_fetch) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
retcode = -1;
goto cleanup;
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index ab5b20e39c5038cdf6e6f05f4d66278a9c1ac156..9d2c07f68dafb6fce87cb59abc1cbf27db9aae09 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1849,7 +1849,7 @@ static void execute_commands_non_atomic(struct command *commands,
continue;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
@@ -1878,7 +1878,7 @@ static void execute_commands_atomic(struct command *commands,
const char *reported_error = "atomic push failure";
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
rp_error("%s", err.buf);
strbuf_reset(&err);
diff --git a/builtin/replace.c b/builtin/replace.c
index a44f4e7ea9ff55dbd1f102aca5e5f5a046391328..a4eaadff91f1be107a8e0e25a701d2006ff8cac2 100644
--- a/builtin/replace.c
+++ b/builtin/replace.c
@@ -201,7 +201,7 @@ static int replace_object_oid(const char *object_ref,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf, repl, &prev,
NULL, NULL, 0, NULL, &err) ||
diff --git a/builtin/tag.c b/builtin/tag.c
index 93d10d59157d2ee1b41f90640bd162917f1eb162..3fa0ab111344dda477763a74d26076b6fb71a5ab 100644
--- a/builtin/tag.c
+++ b/builtin/tag.c
@@ -681,7 +681,7 @@ int cmd_tag(int argc,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref.buf, &object, &prev,
NULL, NULL,
diff --git a/builtin/update-ref.c b/builtin/update-ref.c
index 8a98615dc8613a1be3b17c6d688ab9c0208ed003..670e7812d60ea88a20cd1c5cd113643095cb3be1 100644
--- a/builtin/update-ref.c
+++ b/builtin/update-ref.c
@@ -612,7 +612,7 @@ static void update_refs_stdin(void)
int i, j;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction)
die("%s", err.buf);
@@ -680,7 +680,7 @@ static void update_refs_stdin(void)
*/
state = cmd->state;
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction)
die("%s", err.buf);
diff --git a/refs.c b/refs.c
index 5f729ed4124f7fe8fa9df7fd1f1ce951abefc585..9effeb01eb45728514eab0ca92404ea8cf2158d9 100644
--- a/refs.c
+++ b/refs.c
@@ -918,7 +918,7 @@ int refs_delete_ref(struct ref_store *refs, const char *msg,
struct ref_transaction *transaction;
struct strbuf err = STRBUF_INIT;
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction ||
ref_transaction_delete(transaction, refname, old_oid,
NULL, flags, msg, &err) ||
@@ -1116,6 +1116,7 @@ int read_ref_at(struct ref_store *refs, const char *refname,
}
struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
+ unsigned int flags,
struct strbuf *err)
{
struct ref_transaction *tr;
@@ -1123,6 +1124,7 @@ struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
CALLOC_ARRAY(tr, 1);
tr->ref_store = refs;
+ tr->flags = flags;
return tr;
}
@@ -1309,7 +1311,7 @@ int refs_update_ref(struct ref_store *refs, const char *msg,
struct strbuf err = STRBUF_INIT;
int ret = 0;
- t = ref_store_transaction_begin(refs, &err);
+ t = ref_store_transaction_begin(refs, 0, &err);
if (!t ||
ref_transaction_update(t, refname, new_oid, old_oid, NULL, NULL,
flags, msg, &err) ||
@@ -2120,7 +2122,7 @@ int refs_update_symref(struct ref_store *refs, const char *ref,
struct strbuf err = STRBUF_INIT;
int ret = 0;
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction ||
ref_transaction_update(transaction, ref, NULL, NULL,
target, NULL, REF_NO_DEREF,
@@ -2527,7 +2529,7 @@ int refs_delete_refs(struct ref_store *refs, const char *logmsg,
* individual updates can't fail, so we can pack all of the
* updates into a single transaction.
*/
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction) {
ret = error("%s", err.buf);
goto out;
@@ -2833,7 +2835,7 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- transaction = ref_store_transaction_begin(new_refs, errbuf);
+ transaction = ref_store_transaction_begin(new_refs, 0, errbuf);
if (!transaction)
goto done;
diff --git a/refs.h b/refs.h
index 108dfc93b3428db491916ad8a55daea649d83ffd..9821f3e80d900b31c3dede489c2f415d968233d7 100644
--- a/refs.h
+++ b/refs.h
@@ -234,7 +234,7 @@ char *repo_default_branch_name(struct repository *r, int quiet);
* struct strbuf err = STRBUF_INIT;
* int ret = 0;
*
- * transaction = ref_store_transaction_begin(refs, &err);
+ * transaction = ref_store_transaction_begin(refs, 0, &err);
* if (!transaction ||
* ref_transaction_update(...) ||
* ref_transaction_create(...) ||
@@ -584,6 +584,7 @@ enum action_on_err {
* be freed by calling ref_transaction_free().
*/
struct ref_transaction *ref_store_transaction_begin(struct ref_store *refs,
+ unsigned int flags,
struct strbuf *err);
/*
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 0824c0b8a946909791da36ddb4171db4ad98913b..df61057c9f24972b72644407cc95057891338d96 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -1252,7 +1252,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r)
if (check_refname_format(r->name, 0))
return;
- transaction = ref_store_transaction_begin(&refs->base, &err);
+ transaction = ref_store_transaction_begin(&refs->base, 0, &err);
if (!transaction)
goto cleanup;
ref_transaction_add_update(
@@ -1396,7 +1396,8 @@ static int files_pack_refs(struct ref_store *ref_store,
if (!should_pack_refs(refs, opts))
return 0;
- transaction = ref_store_transaction_begin(refs->packed_ref_store, &err);
+ transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ 0, &err);
if (!transaction)
return -1;
@@ -2867,7 +2868,8 @@ static int files_transaction_prepare(struct ref_store *ref_store,
*/
if (!packed_transaction) {
packed_transaction = ref_store_transaction_begin(
- refs->packed_ref_store, err);
+ refs->packed_ref_store,
+ transaction->flags, err);
if (!packed_transaction) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
@@ -3174,7 +3176,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
&affected_refnames))
BUG("initial ref transaction called with existing refs");
- packed_transaction = ref_store_transaction_begin(refs->packed_ref_store, err);
+ packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ transaction->flags, err);
if (!packed_transaction) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index 2313c830d8facaa17b0b4b073df0de958023062a..dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -193,6 +193,7 @@ struct ref_transaction {
size_t nr;
enum ref_transaction_state state;
void *backend_data;
+ unsigned int flags;
};
/*
diff --git a/sequencer.c b/sequencer.c
index 353d804999b88d5fd5dcf1254d80781f20e62f2e..287f4e5e8766327da6f31e87ce0c20f63b302b77 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -662,7 +662,7 @@ static int fast_forward_to(struct repository *r,
strbuf_addf(&sb, "%s: fast-forward", action_name(opts));
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction ||
ref_transaction_update(transaction, "HEAD",
to, unborn && !is_rebase_i(opts) ?
@@ -1297,7 +1297,7 @@ int update_head_with_reflog(const struct commit *old_head,
}
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- err);
+ 0, err);
if (!transaction ||
ref_transaction_update(transaction, "HEAD", new_head,
old_head ? &old_head->object.oid : null_oid(),
@@ -3890,7 +3890,7 @@ static int do_label(struct repository *r, const char *name, int len)
strbuf_addf(&ref_name, "refs/rewritten/%.*s", len, name);
strbuf_addf(&msg, "rebase (label) '%.*s'", len, name);
- transaction = ref_store_transaction_begin(refs, &err);
+ transaction = ref_store_transaction_begin(refs, 0, &err);
if (!transaction) {
error("%s", err.buf);
ret = -1;
diff --git a/walker.c b/walker.c
index 5ea7e5b392b2bd49f249a9acc8d7ce8779357e1b..7cc9dbea46d64d6bd3336025d640f284a6202157 100644
--- a/walker.c
+++ b/walker.c
@@ -290,7 +290,7 @@ int walker_fetch(struct walker *walker, int targets, char **target,
if (write_ref) {
transaction = ref_store_transaction_begin(get_main_ref_store(the_repository),
- &err);
+ 0, &err);
if (!transaction) {
error("%s", err.buf);
goto done;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 02/10] refs/files: move logic to commit initial transaction
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 03/10] refs: introduce "initial" transaction flag Patrick Steinhardt
` (8 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Move the logic to commit initial transactions such that we can start to
call it in `files_transaction_finish()` in a subsequent commit without
requiring a separate function declaration.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs/files-backend.c | 202 +++++++++++++++++++++++++--------------------------
1 file changed, 101 insertions(+), 101 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index df61057c9f24972b72644407cc95057891338d96..f37c805a34167b3749fbe724788180975abdae90 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2975,6 +2975,107 @@ static int parse_and_write_reflog(struct files_ref_store *refs,
return 0;
}
+static int ref_present(const char *refname, const char *referent UNUSED,
+ const struct object_id *oid UNUSED,
+ int flags UNUSED,
+ void *cb_data)
+{
+ struct string_list *affected_refnames = cb_data;
+
+ return string_list_has_string(affected_refnames, refname);
+}
+
+static int files_initial_transaction_commit(struct ref_store *ref_store,
+ struct ref_transaction *transaction,
+ struct strbuf *err)
+{
+ struct files_ref_store *refs =
+ files_downcast(ref_store, REF_STORE_WRITE,
+ "initial_ref_transaction_commit");
+ size_t i;
+ int ret = 0;
+ struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
+ struct ref_transaction *packed_transaction = NULL;
+
+ assert(err);
+
+ if (transaction->state != REF_TRANSACTION_OPEN)
+ BUG("commit called for transaction that is not open");
+
+ /* Fail if a refname appears more than once in the transaction: */
+ for (i = 0; i < transaction->nr; i++)
+ string_list_append(&affected_refnames,
+ transaction->updates[i]->refname);
+ string_list_sort(&affected_refnames);
+ if (ref_update_reject_duplicates(&affected_refnames, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ /*
+ * It's really undefined to call this function in an active
+ * repository or when there are existing references: we are
+ * only locking and changing packed-refs, so (1) any
+ * simultaneous processes might try to change a reference at
+ * the same time we do, and (2) any existing loose versions of
+ * the references that we are setting would have precedence
+ * over our values. But some remote helpers create the remote
+ * "HEAD" and "master" branches before calling this function,
+ * so here we really only check that none of the references
+ * that we are creating already exists.
+ */
+ if (refs_for_each_rawref(&refs->base, ref_present,
+ &affected_refnames))
+ BUG("initial ref transaction called with existing refs");
+
+ packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
+ transaction->flags, err);
+ if (!packed_transaction) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ for (i = 0; i < transaction->nr; i++) {
+ struct ref_update *update = transaction->updates[i];
+
+ if ((update->flags & REF_HAVE_OLD) &&
+ !is_null_oid(&update->old_oid))
+ BUG("initial ref transaction with old_sha1 set");
+ if (refs_verify_refname_available(&refs->base, update->refname,
+ &affected_refnames, NULL,
+ err)) {
+ ret = TRANSACTION_NAME_CONFLICT;
+ goto cleanup;
+ }
+
+ /*
+ * Add a reference creation for this reference to the
+ * packed-refs transaction:
+ */
+ ref_transaction_add_update(packed_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ &update->new_oid, &update->old_oid,
+ NULL, NULL, NULL);
+ }
+
+ if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+
+ if (initial_ref_transaction_commit(packed_transaction, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ }
+
+ packed_refs_unlock(refs->packed_ref_store);
+cleanup:
+ if (packed_transaction)
+ ref_transaction_free(packed_transaction);
+ transaction->state = REF_TRANSACTION_CLOSED;
+ string_list_clear(&affected_refnames, 0);
+ return ret;
+}
+
static int files_transaction_finish(struct ref_store *ref_store,
struct ref_transaction *transaction,
struct strbuf *err)
@@ -3123,107 +3224,6 @@ static int files_transaction_abort(struct ref_store *ref_store,
return 0;
}
-static int ref_present(const char *refname, const char *referent UNUSED,
- const struct object_id *oid UNUSED,
- int flags UNUSED,
- void *cb_data)
-{
- struct string_list *affected_refnames = cb_data;
-
- return string_list_has_string(affected_refnames, refname);
-}
-
-static int files_initial_transaction_commit(struct ref_store *ref_store,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct files_ref_store *refs =
- files_downcast(ref_store, REF_STORE_WRITE,
- "initial_ref_transaction_commit");
- size_t i;
- int ret = 0;
- struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
- struct ref_transaction *packed_transaction = NULL;
-
- assert(err);
-
- if (transaction->state != REF_TRANSACTION_OPEN)
- BUG("commit called for transaction that is not open");
-
- /* Fail if a refname appears more than once in the transaction: */
- for (i = 0; i < transaction->nr; i++)
- string_list_append(&affected_refnames,
- transaction->updates[i]->refname);
- string_list_sort(&affected_refnames);
- if (ref_update_reject_duplicates(&affected_refnames, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- /*
- * It's really undefined to call this function in an active
- * repository or when there are existing references: we are
- * only locking and changing packed-refs, so (1) any
- * simultaneous processes might try to change a reference at
- * the same time we do, and (2) any existing loose versions of
- * the references that we are setting would have precedence
- * over our values. But some remote helpers create the remote
- * "HEAD" and "master" branches before calling this function,
- * so here we really only check that none of the references
- * that we are creating already exists.
- */
- if (refs_for_each_rawref(&refs->base, ref_present,
- &affected_refnames))
- BUG("initial ref transaction called with existing refs");
-
- packed_transaction = ref_store_transaction_begin(refs->packed_ref_store,
- transaction->flags, err);
- if (!packed_transaction) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- for (i = 0; i < transaction->nr; i++) {
- struct ref_update *update = transaction->updates[i];
-
- if ((update->flags & REF_HAVE_OLD) &&
- !is_null_oid(&update->old_oid))
- BUG("initial ref transaction with old_sha1 set");
- if (refs_verify_refname_available(&refs->base, update->refname,
- &affected_refnames, NULL,
- err)) {
- ret = TRANSACTION_NAME_CONFLICT;
- goto cleanup;
- }
-
- /*
- * Add a reference creation for this reference to the
- * packed-refs transaction:
- */
- ref_transaction_add_update(packed_transaction, update->refname,
- update->flags & ~REF_HAVE_OLD,
- &update->new_oid, &update->old_oid,
- NULL, NULL, NULL);
- }
-
- if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- goto cleanup;
- }
-
- if (initial_ref_transaction_commit(packed_transaction, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
- }
-
- packed_refs_unlock(refs->packed_ref_store);
-cleanup:
- if (packed_transaction)
- ref_transaction_free(packed_transaction);
- transaction->state = REF_TRANSACTION_CLOSED;
- string_list_clear(&affected_refnames, 0);
- return ret;
-}
-
struct expire_reflog_cb {
reflog_expiry_should_prune_fn *should_prune_fn;
void *policy_cb;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 03/10] refs: introduce "initial" transaction flag
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 01/10] refs: allow passing flags when setting up a transaction Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 02/10] refs/files: move logic to commit initial transaction Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
` (7 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
There are two different ways to commit a transaction:
- `ref_transaction_commit()` can be used to commit a regular
transaction and is what almost every caller wants.
- `initial_ref_transaction_commit()` can be used when it is known that
the ref store that the transaction is committed for is empty and
when there are no concurrent processes. This is used when cloning a
new repository.
Implementing this via two separate functions has a couple of downsides.
First, every reference backend needs to implement a separate callback
even in the case where they don't special-case the initial transaction.
Second, backends are basically forced to reimplement the whole logic for
how to commit the transaction like the "files" backend does, even though
backends may wish to only tweak certain behaviour of a "normal" commit.
Third, it is awkward that callers must never prepare the transaction as
this is somewhat different than how a transaction typically works.
Refactor the code such that we instead mark initial transactions via a
separate flag when starting the transaction. This addresses all of the
mentioned painpoints, where the most important part is that it will
allow backends to have way more leeway in how exactly they want to
handle the initial transaction.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
builtin/clone.c | 4 ++--
refs.c | 10 +---------
refs.h | 37 ++++++++++++++++++-------------------
refs/debug.c | 13 -------------
refs/files-backend.c | 16 ++++++++--------
refs/packed-backend.c | 8 --------
refs/refs-internal.h | 1 -
refs/reftable-backend.c | 8 --------
8 files changed, 29 insertions(+), 68 deletions(-)
diff --git a/builtin/clone.c b/builtin/clone.c
index d963cc6eb5181e1af5bb29c07c3ee2fa24ad04ca..8427ccec17e77b23ee1a7f526b80a52d79a71873 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -574,7 +574,7 @@ static void write_remote_refs(const struct ref *local_refs)
struct strbuf err = STRBUF_INIT;
t = ref_store_transaction_begin(get_main_ref_store(the_repository),
- 0, &err);
+ REF_TRANSACTION_FLAG_INITIAL, &err);
if (!t)
die("%s", err.buf);
@@ -586,7 +586,7 @@ static void write_remote_refs(const struct ref *local_refs)
die("%s", err.buf);
}
- if (initial_ref_transaction_commit(t, &err))
+ if (ref_transaction_commit(t, &err))
die("%s", err.buf);
strbuf_release(&err);
diff --git a/refs.c b/refs.c
index 9effeb01eb45728514eab0ca92404ea8cf2158d9..8b9dfc6173fd144fe9a172cb81cf33057ae31368 100644
--- a/refs.c
+++ b/refs.c
@@ -2315,7 +2315,7 @@ int ref_transaction_commit(struct ref_transaction *transaction,
}
ret = refs->be->transaction_finish(refs, transaction, err);
- if (!ret)
+ if (!ret && !(transaction->flags & REF_TRANSACTION_FLAG_INITIAL))
run_transaction_hook(transaction, "committed");
return ret;
}
@@ -2486,14 +2486,6 @@ int refs_reflog_expire(struct ref_store *refs,
cleanup_fn, policy_cb_data);
}
-int initial_ref_transaction_commit(struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct ref_store *refs = transaction->ref_store;
-
- return refs->be->initial_transaction_commit(refs, transaction, err);
-}
-
void ref_transaction_for_each_queued_update(struct ref_transaction *transaction,
ref_transaction_for_each_queued_update_fn cb,
void *cb_data)
diff --git a/refs.h b/refs.h
index 9821f3e80d900b31c3dede489c2f415d968233d7..024a370554e013d66febee635e4c0415ba061fe6 100644
--- a/refs.h
+++ b/refs.h
@@ -214,11 +214,9 @@ char *repo_default_branch_name(struct repository *r, int quiet);
*
* Or
*
- * - Call `initial_ref_transaction_commit()` if the ref database is
- * known to be empty and have no other writers (e.g. during
- * clone). This is likely to be much faster than
- * `ref_transaction_commit()`. `ref_transaction_prepare()` should
- * *not* be called before `initial_ref_transaction_commit()`.
+ * - Call `ref_transaction_begin()` with REF_TRANSACTION_FLAG_INITIAL if the
+ * ref database is known to be empty and have no other writers (e.g. during
+ * clone). This is likely to be much faster than without the flag.
*
* - Then finally, call `ref_transaction_free()` to free the
* `ref_transaction` data structure.
@@ -579,6 +577,21 @@ enum action_on_err {
UPDATE_REFS_QUIET_ON_ERR
};
+enum ref_transaction_flag {
+ /*
+ * The ref transaction is part of the initial creation of the ref store
+ * and can thus assume that the ref store is completely empty. This
+ * allows the backend to perform the transaction more efficiently by
+ * skipping certain checks.
+ *
+ * It is a bug to set this flag when there might be other processes
+ * accessing the repository or if there are existing references that
+ * might conflict with the ones being created. All old_oid values must
+ * either be absent or null_oid.
+ */
+ REF_TRANSACTION_FLAG_INITIAL = (1 << 0),
+};
+
/*
* Begin a reference transaction. The reference transaction must
* be freed by calling ref_transaction_free().
@@ -798,20 +811,6 @@ int ref_transaction_commit(struct ref_transaction *transaction,
int ref_transaction_abort(struct ref_transaction *transaction,
struct strbuf *err);
-/*
- * Like ref_transaction_commit(), but optimized for creating
- * references when originally initializing a repository (e.g., by "git
- * clone"). It writes the new references directly to packed-refs
- * without locking the individual references.
- *
- * It is a bug to call this function when there might be other
- * processes accessing the repository or if there are existing
- * references that might conflict with the ones being created. All
- * old_oid values must either be absent or null_oid.
- */
-int initial_ref_transaction_commit(struct ref_transaction *transaction,
- struct strbuf *err);
-
/*
* Execute the given callback function for each of the reference updates which
* have been queued in the given transaction. `old_oid` and `new_oid` may be
diff --git a/refs/debug.c b/refs/debug.c
index 45e2e784a0f8c49f492eaa9d371aa44f8c7916c3..cbac62c8a4f924580e80f106f87639daf77cef5c 100644
--- a/refs/debug.c
+++ b/refs/debug.c
@@ -118,18 +118,6 @@ static int debug_transaction_abort(struct ref_store *refs,
return res;
}
-static int debug_initial_transaction_commit(struct ref_store *refs,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- struct debug_ref_store *drefs = (struct debug_ref_store *)refs;
- int res;
- transaction->ref_store = drefs->refs;
- res = drefs->refs->be->initial_transaction_commit(drefs->refs,
- transaction, err);
- return res;
-}
-
static int debug_pack_refs(struct ref_store *ref_store, struct pack_refs_opts *opts)
{
struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store;
@@ -443,7 +431,6 @@ struct ref_storage_be refs_be_debug = {
.transaction_prepare = debug_transaction_prepare,
.transaction_finish = debug_transaction_finish,
.transaction_abort = debug_transaction_abort,
- .initial_transaction_commit = debug_initial_transaction_commit,
.pack_refs = debug_pack_refs,
.rename_ref = debug_rename_ref,
diff --git a/refs/files-backend.c b/refs/files-backend.c
index f37c805a34167b3749fbe724788180975abdae90..3ed18475a72aa4798d15b2912c37b4caabd47aac 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2781,6 +2781,8 @@ static int files_transaction_prepare(struct ref_store *ref_store,
assert(err);
+ if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL)
+ goto cleanup;
if (!transaction->nr)
goto cleanup;
@@ -2985,13 +2987,10 @@ static int ref_present(const char *refname, const char *referent UNUSED,
return string_list_has_string(affected_refnames, refname);
}
-static int files_initial_transaction_commit(struct ref_store *ref_store,
+static int files_transaction_finish_initial(struct files_ref_store *refs,
struct ref_transaction *transaction,
struct strbuf *err)
{
- struct files_ref_store *refs =
- files_downcast(ref_store, REF_STORE_WRITE,
- "initial_ref_transaction_commit");
size_t i;
int ret = 0;
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
@@ -2999,8 +2998,8 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
assert(err);
- if (transaction->state != REF_TRANSACTION_OPEN)
- BUG("commit called for transaction that is not open");
+ if (transaction->state != REF_TRANSACTION_PREPARED)
+ BUG("commit called for transaction that is not prepared");
/* Fail if a refname appears more than once in the transaction: */
for (i = 0; i < transaction->nr; i++)
@@ -3063,7 +3062,7 @@ static int files_initial_transaction_commit(struct ref_store *ref_store,
goto cleanup;
}
- if (initial_ref_transaction_commit(packed_transaction, err)) {
+ if (ref_transaction_commit(packed_transaction, err)) {
ret = TRANSACTION_GENERIC_ERROR;
}
@@ -3091,6 +3090,8 @@ static int files_transaction_finish(struct ref_store *ref_store,
assert(err);
+ if (transaction->flags & REF_TRANSACTION_FLAG_INITIAL)
+ return files_transaction_finish_initial(refs, transaction, err);
if (!transaction->nr) {
transaction->state = REF_TRANSACTION_CLOSED;
return 0;
@@ -3617,7 +3618,6 @@ struct ref_storage_be refs_be_files = {
.transaction_prepare = files_transaction_prepare,
.transaction_finish = files_transaction_finish,
.transaction_abort = files_transaction_abort,
- .initial_transaction_commit = files_initial_transaction_commit,
.pack_refs = files_pack_refs,
.rename_ref = files_rename_ref,
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 07c57fd541a5039d5fcb93d9bf78e1916f67b219..794471de60c78494f1f2b0e9de013422e3e7625d 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -1730,13 +1730,6 @@ static int packed_transaction_finish(struct ref_store *ref_store,
return ret;
}
-static int packed_initial_transaction_commit(struct ref_store *ref_store UNUSED,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- return ref_transaction_commit(transaction, err);
-}
-
static int packed_pack_refs(struct ref_store *ref_store UNUSED,
struct pack_refs_opts *pack_opts UNUSED)
{
@@ -1769,7 +1762,6 @@ struct ref_storage_be refs_be_packed = {
.transaction_prepare = packed_transaction_prepare,
.transaction_finish = packed_transaction_finish,
.transaction_abort = packed_transaction_abort,
- .initial_transaction_commit = packed_initial_transaction_commit,
.pack_refs = packed_pack_refs,
.rename_ref = NULL,
diff --git a/refs/refs-internal.h b/refs/refs-internal.h
index dbc6360c5a1d410c192e7eee1bffb1d423e1f9ee..33335d54c9162755c70174093017439c0018f36d 100644
--- a/refs/refs-internal.h
+++ b/refs/refs-internal.h
@@ -666,7 +666,6 @@ struct ref_storage_be {
ref_transaction_prepare_fn *transaction_prepare;
ref_transaction_finish_fn *transaction_finish;
ref_transaction_abort_fn *transaction_abort;
- ref_transaction_commit_fn *initial_transaction_commit;
pack_refs_fn *pack_refs;
rename_ref_fn *rename_ref;
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 38eb14d591ec0c7c221b6b0f7483e547748e7f1c..8e914afc817f198bed3199630b430e179cabc740 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1490,13 +1490,6 @@ static int reftable_be_transaction_finish(struct ref_store *ref_store UNUSED,
return ret;
}
-static int reftable_be_initial_transaction_commit(struct ref_store *ref_store UNUSED,
- struct ref_transaction *transaction,
- struct strbuf *err)
-{
- return ref_transaction_commit(transaction, err);
-}
-
static int reftable_be_pack_refs(struct ref_store *ref_store,
struct pack_refs_opts *opts)
{
@@ -2490,7 +2483,6 @@ struct ref_storage_be refs_be_reftable = {
.transaction_prepare = reftable_be_transaction_prepare,
.transaction_finish = reftable_be_transaction_finish,
.transaction_abort = reftable_be_transaction_abort,
- .initial_transaction_commit = reftable_be_initial_transaction_commit,
.pack_refs = reftable_be_pack_refs,
.rename_ref = reftable_be_rename_ref,
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 04/10] refs/files: support symbolic and root refs in initial transaction
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (2 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 03/10] refs: introduce "initial" transaction flag Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
` (6 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
The "files" backend has implemented special logic when committing
the first transactions in an otherwise empty ref store: instead of
writing all refs as separate loose files, it instead knows to write them
all into a "packed-refs" file directly. This is significantly more
efficient than having to write each of the refs as separate "loose" ref.
The only user of this optimization is git-clone(1), which only uses this
mechanism to write regular refs. Consequently, the implementation does
not know how to handle both symbolic and root refs. While fine in the
context of git-clone(1), this keeps us from using the mechanism in more
cases.
Adapt the logic to also support symbolic and root refs by using a second
transaction that we use for all of the refs that need to be written as
loose refs.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs/files-backend.c | 44 ++++++++++++++++++++++++++++++++++----------
1 file changed, 34 insertions(+), 10 deletions(-)
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 3ed18475a72aa4798d15b2912c37b4caabd47aac..116d4259697b20583cb2db34ed47025e8781cd42 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -2995,6 +2995,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
int ret = 0;
struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
struct ref_transaction *packed_transaction = NULL;
+ struct ref_transaction *loose_transaction = NULL;
assert(err);
@@ -3040,6 +3041,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
if ((update->flags & REF_HAVE_OLD) &&
!is_null_oid(&update->old_oid))
BUG("initial ref transaction with old_sha1 set");
+
if (refs_verify_refname_available(&refs->base, update->refname,
&affected_refnames, NULL,
err)) {
@@ -3048,26 +3050,48 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
}
/*
- * Add a reference creation for this reference to the
- * packed-refs transaction:
+ * packed-refs don't support symbolic refs and root refs, so we
+ * have to queue these references via the loose transaction.
*/
- ref_transaction_add_update(packed_transaction, update->refname,
- update->flags & ~REF_HAVE_OLD,
- &update->new_oid, &update->old_oid,
- NULL, NULL, NULL);
+ if (update->new_target || is_root_ref(update->refname)) {
+ if (!loose_transaction) {
+ loose_transaction = ref_store_transaction_begin(&refs->base, 0, err);
+ if (!loose_transaction) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
+ }
+
+ ref_transaction_add_update(loose_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ update->new_target ? NULL : &update->new_oid, NULL,
+ update->new_target, NULL, NULL);
+ } else {
+ ref_transaction_add_update(packed_transaction, update->refname,
+ update->flags & ~REF_HAVE_OLD,
+ &update->new_oid, &update->old_oid,
+ NULL, NULL, NULL);
+ }
}
- if (packed_refs_lock(refs->packed_ref_store, 0, err)) {
+ if (packed_refs_lock(refs->packed_ref_store, 0, err) ||
+ ref_transaction_commit(packed_transaction, err)) {
ret = TRANSACTION_GENERIC_ERROR;
goto cleanup;
}
+ packed_refs_unlock(refs->packed_ref_store);
- if (ref_transaction_commit(packed_transaction, err)) {
- ret = TRANSACTION_GENERIC_ERROR;
+ if (loose_transaction) {
+ if (ref_transaction_prepare(loose_transaction, err) ||
+ ref_transaction_commit(loose_transaction, err)) {
+ ret = TRANSACTION_GENERIC_ERROR;
+ goto cleanup;
+ }
}
- packed_refs_unlock(refs->packed_ref_store);
cleanup:
+ if (loose_transaction)
+ ref_transaction_free(loose_transaction);
if (packed_transaction)
ref_transaction_free(packed_transaction);
transaction->state = REF_TRANSACTION_CLOSED;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 05/10] refs: use "initial" transaction semantics to migrate refs
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (3 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 04/10] refs/files: support symbolic and root refs in initial transaction Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
` (5 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Until now, we couldn't use "initial" transaction semantics to migrate
refs because the "files" backend only supported writing regular refs via
the initial transaction because it simply mapped the transaction to a
"packed-refs" transaction. But with the preceding commit, the "files"
backend has learned to also write symbolic and root refs in the initial
transaction by creating a second transaction for all refs that need to
be written as loose refs.
Adapt the code to migrate refs to commit the transaction as an initial
transaction. This results in a signiticant speedup when migrating many
refs:
Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 3.247 s ± 0.034 s [User: 0.485 s, System: 2.722 s]
Range (min … max): 3.216 s … 3.309 s 10 runs
Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
Time (mean ± σ): 453.6 ms ± 1.9 ms [User: 214.6 ms, System: 230.5 ms]
Range (min … max): 451.5 ms … 456.4 ms 10 runs
Summary
migrate reftable:files (refcount = 100000, revision = HEAD) ran
7.16 ± 0.08 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
As the reftable backend doesn't (yet) special-case initial transactions
there is no comparable speedup for that backend.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 10 ++--------
t/t1460-refs-migrate.sh | 2 +-
2 files changed, 3 insertions(+), 9 deletions(-)
diff --git a/refs.c b/refs.c
index 8b9dfc6173fd144fe9a172cb81cf33057ae31368..0f10c565bbb4e0d91210c52a3221a224e4264d28 100644
--- a/refs.c
+++ b/refs.c
@@ -2827,7 +2827,8 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- transaction = ref_store_transaction_begin(new_refs, 0, errbuf);
+ transaction = ref_store_transaction_begin(new_refs, REF_TRANSACTION_FLAG_INITIAL,
+ errbuf);
if (!transaction)
goto done;
@@ -2852,13 +2853,6 @@ int repo_migrate_ref_storage_format(struct repository *repo,
if (ret < 0)
goto done;
- /*
- * TODO: we might want to migrate to `initial_ref_transaction_commit()`
- * here, which is more efficient for the files backend because it would
- * write new refs into the packed-refs file directly. At this point,
- * the files backend doesn't handle pseudo-refs and symrefs correctly
- * though, so this requires some more work.
- */
ret = ref_transaction_commit(transaction, errbuf);
if (ret < 0)
goto done;
diff --git a/t/t1460-refs-migrate.sh b/t/t1460-refs-migrate.sh
index f7c0783d30ccd61b0fee67c115193b42bb0e2c77..b90b38a87f7bb905afeeceb4f9a3bfc8b772e16a 100755
--- a/t/t1460-refs-migrate.sh
+++ b/t/t1460-refs-migrate.sh
@@ -237,7 +237,7 @@ test_expect_success 'migrating from reftable format deletes backend files' '
test_path_is_missing repo/.git/reftable &&
echo "ref: refs/heads/main" >expect &&
test_cmp expect repo/.git/HEAD &&
- test_path_is_file repo/.git/refs/heads/main
+ test_path_is_file repo/.git/packed-refs
'
test_done
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 06/10] refs: skip collision checks in initial transactions
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (4 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 05/10] refs: use "initial" transaction semantics to migrate refs Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` Patrick Steinhardt
` (4 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Reference transactions use `refs_verify_refname_available()` to check
for colliding references. This check consists of two parts:
- Checks for whether multiple ref updates in the same transaction
conflict with each other.
- Checks for whether existing refs conflict with any refs part of the
transaction.
While we generally cannot avoid the first check, the second check is
superfluous in cases where the transaction is an initial one in an
otherwise empty ref store. The check results in multiple ref reads as
well as the creation of a ref iterator for every ref we're checking,
which adds up quite fast when performing the check for many refs.
Introduce a new flag that allows us to skip this check and wire it up so
that the backends pass it when running an initial transaction. This
leads to significant speedups when migrating ref storage backends. From
"files" to "reftable":
Benchmark 1: migrate files:reftable (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 472.4 ms ± 6.7 ms [User: 175.9 ms, System: 285.2 ms]
Range (min … max): 463.5 ms … 483.2 ms 10 runs
Benchmark 2: migrate files:reftable (refcount = 100000, revision = HEAD)
Time (mean ± σ): 86.1 ms ± 1.9 ms [User: 67.9 ms, System: 16.0 ms]
Range (min … max): 82.9 ms … 90.9 ms 29 runs
Summary
migrate files:reftable (refcount = 100000, revision = HEAD) ran
5.48 ± 0.15 times faster than migrate files:reftable (refcount = 100000, revision = HEAD~)
And from "reftable" to "files":
Benchmark 1: migrate reftable:files (refcount = 100000, revision = HEAD~)
Time (mean ± σ): 452.7 ms ± 3.4 ms [User: 209.9 ms, System: 235.4 ms]
Range (min … max): 445.9 ms … 457.5 ms 10 runs
Benchmark 2: migrate reftable:files (refcount = 100000, revision = HEAD)
Time (mean ± σ): 95.2 ms ± 2.2 ms [User: 73.6 ms, System: 20.6 ms]
Range (min … max): 91.7 ms … 100.8 ms 28 runs
Summary
migrate reftable:files (refcount = 100000, revision = HEAD) ran
4.76 ± 0.11 times faster than migrate reftable:files (refcount = 100000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 37 +++++++++++++++++++++----------------
refs.h | 5 ++++-
refs/files-backend.c | 13 ++++++-------
refs/reftable-backend.c | 6 ++++--
t/helper/test-ref-store.c | 2 +-
5 files changed, 36 insertions(+), 27 deletions(-)
diff --git a/refs.c b/refs.c
index 0f10c565bbb4e0d91210c52a3221a224e4264d28..65d42dd603c6cfcb7966c795166b054343034608 100644
--- a/refs.c
+++ b/refs.c
@@ -2324,6 +2324,7 @@ int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
+ unsigned int initial_transaction,
struct strbuf *err)
{
const char *slash;
@@ -2332,8 +2333,6 @@ int refs_verify_refname_available(struct ref_store *refs,
struct strbuf referent = STRBUF_INIT;
struct object_id oid;
unsigned int type;
- struct ref_iterator *iter;
- int ok;
int ret = -1;
/*
@@ -2363,7 +2362,8 @@ int refs_verify_refname_available(struct ref_store *refs,
if (skip && string_list_has_string(skip, dirname.buf))
continue;
- if (!refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
+ if (!initial_transaction &&
+ !refs_read_raw_ref(refs, dirname.buf, &oid, &referent,
&type, &ignore_errno)) {
strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
dirname.buf, refname);
@@ -2388,21 +2388,26 @@ int refs_verify_refname_available(struct ref_store *refs,
strbuf_addstr(&dirname, refname + dirname.len);
strbuf_addch(&dirname, '/');
- iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
- DO_FOR_EACH_INCLUDE_BROKEN);
- while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
- if (skip &&
- string_list_has_string(skip, iter->refname))
- continue;
+ if (!initial_transaction) {
+ struct ref_iterator *iter;
+ int ok;
- strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
- iter->refname, refname);
- ref_iterator_abort(iter);
- goto cleanup;
- }
+ iter = refs_ref_iterator_begin(refs, dirname.buf, NULL, 0,
+ DO_FOR_EACH_INCLUDE_BROKEN);
+ while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
+ if (skip &&
+ string_list_has_string(skip, iter->refname))
+ continue;
- if (ok != ITER_DONE)
- BUG("error while iterating over references");
+ strbuf_addf(err, _("'%s' exists; cannot create '%s'"),
+ iter->refname, refname);
+ ref_iterator_abort(iter);
+ goto cleanup;
+ }
+
+ if (ok != ITER_DONE)
+ BUG("error while iterating over references");
+ }
extra_refname = find_descendant_ref(dirname.buf, extras, skip);
if (extra_refname)
diff --git a/refs.h b/refs.h
index 024a370554e013d66febee635e4c0415ba061fe6..95baf194ba9493f4e8f1f70924f0eb713e5bbd49 100644
--- a/refs.h
+++ b/refs.h
@@ -101,13 +101,16 @@ int refs_read_symbolic_ref(struct ref_store *ref_store, const char *refname,
* both "foo" and with "foo/bar/baz" but not with "foo/bar" or
* "foo/barbados".
*
+ * If `initial_transaction` is truish, then all collision checks with
+ * preexisting refs are skipped.
+ *
* extras and skip must be sorted.
*/
-
int refs_verify_refname_available(struct ref_store *refs,
const char *refname,
const struct string_list *extras,
const struct string_list *skip,
+ unsigned int initial_transaction,
struct strbuf *err);
int refs_ref_exists(struct ref_store *refs, const char *refname);
diff --git a/refs/files-backend.c b/refs/files-backend.c
index 116d4259697b20583cb2db34ed47025e8781cd42..d27806c02c272f8bddc789b509e3c3c7af4f75aa 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -706,7 +706,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
* reason to expect this error to be transitory.
*/
if (refs_verify_refname_available(&refs->base, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
if (mustexist) {
/*
* To the user the relevant error is
@@ -813,7 +813,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
REMOVE_DIR_EMPTY_ONLY)) {
if (refs_verify_refname_available(
&refs->base, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
/*
* The error message set by
* verify_refname_available() is OK.
@@ -850,7 +850,7 @@ static int lock_raw_ref(struct files_ref_store *refs,
*/
if (refs_verify_refname_available(
refs->packed_ref_store, refname,
- extras, NULL, err)) {
+ extras, NULL, 0, err)) {
ret = TRANSACTION_NAME_CONFLICT;
goto error_return;
}
@@ -1159,7 +1159,7 @@ static struct ref_lock *lock_ref_oid_basic(struct files_ref_store *refs,
*/
if (is_null_oid(&lock->old_oid) &&
refs_verify_refname_available(refs->packed_ref_store, refname,
- NULL, NULL, err))
+ NULL, NULL, 0, err))
goto error_return;
lock->ref_name = xstrdup(refname);
@@ -1538,7 +1538,7 @@ static int refs_rename_ref_available(struct ref_store *refs,
string_list_insert(&skip, old_refname);
ok = !refs_verify_refname_available(refs, new_refname,
- NULL, &skip, &err);
+ NULL, &skip, 0, &err);
if (!ok)
error("%s", err.buf);
@@ -3043,8 +3043,7 @@ static int files_transaction_finish_initial(struct files_ref_store *refs,
BUG("initial ref transaction with old_sha1 set");
if (refs_verify_refname_available(&refs->base, update->refname,
- &affected_refnames, NULL,
- err)) {
+ &affected_refnames, NULL, 1, err)) {
ret = TRANSACTION_NAME_CONFLICT;
goto cleanup;
}
diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 8e914afc817f198bed3199630b430e179cabc740..bbc779ab410b41f00759a3a41a76dd708f115c90 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -1097,7 +1097,9 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
* at a later point.
*/
ret = refs_verify_refname_available(ref_store, u->refname,
- &affected_refnames, NULL, err);
+ &affected_refnames, NULL,
+ transaction->flags & REF_TRANSACTION_FLAG_INITIAL,
+ err);
if (ret < 0)
goto done;
@@ -1584,7 +1586,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
if (arg->delete_old)
string_list_insert(&skip, arg->oldname);
ret = refs_verify_refname_available(&arg->refs->base, arg->newname,
- NULL, &skip, &errbuf);
+ NULL, &skip, 0, &errbuf);
if (ret < 0) {
error("%s", errbuf.buf);
goto done;
diff --git a/t/helper/test-ref-store.c b/t/helper/test-ref-store.c
index 65346dee551ccd781a88786f0c8465f60b286cde..240f6fc29d7f1bb20658deee467bcb46ac3407b2 100644
--- a/t/helper/test-ref-store.c
+++ b/t/helper/test-ref-store.c
@@ -199,7 +199,7 @@ static int cmd_verify_ref(struct ref_store *refs, const char **argv)
struct strbuf err = STRBUF_INIT;
int ret;
- ret = refs_verify_refname_available(refs, refname, NULL, NULL, &err);
+ ret = refs_verify_refname_available(refs, refname, NULL, NULL, 0, &err);
if (err.len)
puts(err.buf);
return ret;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG`
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (5 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 06/10] refs: skip collision checks in initial transactions Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
` (3 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
When the `REF_SKIP_CREATE_REFLOG` flag is set we skip the creation of
the reflog entry, but we still normalize the reflog message when we
queue the update. This is a waste of resources as the normalized message
will never get used in the first place.
Fix this issue by skipping the normalization in case the flag is set.
This leads to a surprisingly large speedup when migrating from the
"files" to the "reftable" backend:
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 878.5 ms ± 14.9 ms [User: 726.5 ms, System: 139.2 ms]
Range (min … max): 858.4 ms … 941.3 ms 50 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 831.1 ms ± 10.5 ms [User: 694.1 ms, System: 126.3 ms]
Range (min … max): 812.4 ms … 851.4 ms 50 runs
Summary
migrate files:reftable (refcount = 1000000, revision = HEAD) ran
1.06 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)
And an ever larger speedup when migrating the other way round:
Benchmark 1: migrate reftable:files (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 923.6 ms ± 11.6 ms [User: 705.5 ms, System: 208.1 ms]
Range (min … max): 905.3 ms … 946.5 ms 50 runs
Benchmark 2: migrate reftable:files (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 818.5 ms ± 9.0 ms [User: 627.6 ms, System: 180.6 ms]
Range (min … max): 802.2 ms … 842.9 ms 50 runs
Summary
migrate reftable:files (refcount = 1000000, revision = HEAD) ran
1.13 ± 0.02 times faster than migrate reftable:files (refcount = 1000000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
refs.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/refs.c b/refs.c
index 65d42dd603c6cfcb7966c795166b054343034608..ee870817466b7d6d6a6619ce0baffe17f3d5a39f 100644
--- a/refs.c
+++ b/refs.c
@@ -1188,8 +1188,9 @@ struct ref_update *ref_transaction_add_update(
oidcpy(&update->new_oid, new_oid);
if ((flags & REF_HAVE_OLD) && old_oid)
oidcpy(&update->old_oid, old_oid);
+ if (!(flags & REF_SKIP_CREATE_REFLOG))
+ update->msg = normalize_reflog_message(msg);
- update->msg = normalize_reflog_message(msg);
return update;
}
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 08/10] reftable/writer: optimize allocations by using a scratch buffer
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (6 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 07/10] refs: don't normalize log messages with `REF_SKIP_CREATE_REFLOG` Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 09/10] reftable/block: rename `block_writer::buf` variable Patrick Steinhardt
` (2 subsequent siblings)
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Both `writer_add_record()` and `reftable_writer_add_ref()` get executed
for every single ref record we're adding to the reftable writer. And as
both functions use a local buffer to write data, the allocations we have
to do here add up during larger transactions.
Refactor the code to use a scratch buffer part of the `reftable_writer`
itself such that we can reuse it. This signifcantly reduces the number
of allocations during large transactions, e.g. when migrating refs from
the "files" backend to the "reftable" backend. Before this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 5,032,171 allocs, 5,032,122 frees, 418,792,092 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated
This also translate into a small speedup:
Benchmark 1: migrate files:reftable (refcount = 1000000, revision = HEAD~)
Time (mean ± σ): 827.2 ms ± 16.5 ms [User: 689.4 ms, System: 124.9 ms]
Range (min … max): 809.0 ms … 924.7 ms 50 runs
Benchmark 2: migrate files:reftable (refcount = 1000000, revision = HEAD)
Time (mean ± σ): 813.6 ms ± 11.6 ms [User: 679.0 ms, System: 123.4 ms]
Range (min … max): 786.7 ms … 833.5 ms 50 runs
Summary
migrate files:reftable (refcount = 1000000, revision = HEAD) ran
1.02 ± 0.02 times faster than migrate files:reftable (refcount = 1000000, revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/writer.c | 23 +++++++++++------------
reftable/writer.h | 2 ++
2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/reftable/writer.c b/reftable/writer.c
index fd136794d5a27b33b5017f36fbd6b095ab8dac5b..be0fae6cb229411258d40b8865c2fdee88fd5bcd 100644
--- a/reftable/writer.c
+++ b/reftable/writer.c
@@ -148,6 +148,7 @@ int reftable_writer_new(struct reftable_writer **out,
reftable_buf_init(&wp->block_writer_data.last_key);
reftable_buf_init(&wp->last_key);
+ reftable_buf_init(&wp->scratch);
REFTABLE_CALLOC_ARRAY(wp->block, opts.block_size);
if (!wp->block) {
reftable_free(wp);
@@ -180,6 +181,7 @@ static void writer_release(struct reftable_writer *w)
w->block_writer = NULL;
writer_clear_index(w);
reftable_buf_release(&w->last_key);
+ reftable_buf_release(&w->scratch);
}
}
@@ -249,20 +251,19 @@ static int writer_index_hash(struct reftable_writer *w, struct reftable_buf *has
static int writer_add_record(struct reftable_writer *w,
struct reftable_record *rec)
{
- struct reftable_buf key = REFTABLE_BUF_INIT;
int err;
- err = reftable_record_key(rec, &key);
+ err = reftable_record_key(rec, &w->scratch);
if (err < 0)
goto done;
- if (reftable_buf_cmp(&w->last_key, &key) >= 0) {
+ if (reftable_buf_cmp(&w->last_key, &w->scratch) >= 0) {
err = REFTABLE_API_ERROR;
goto done;
}
reftable_buf_reset(&w->last_key);
- err = reftable_buf_add(&w->last_key, key.buf, key.len);
+ err = reftable_buf_add(&w->last_key, w->scratch.buf, w->scratch.len);
if (err < 0)
goto done;
@@ -312,7 +313,6 @@ static int writer_add_record(struct reftable_writer *w,
}
done:
- reftable_buf_release(&key);
return err;
}
@@ -325,7 +325,6 @@ int reftable_writer_add_ref(struct reftable_writer *w,
.ref = *ref
},
};
- struct reftable_buf buf = REFTABLE_BUF_INIT;
int err;
if (!ref->refname ||
@@ -340,24 +339,25 @@ int reftable_writer_add_ref(struct reftable_writer *w,
goto out;
if (!w->opts.skip_index_objects && reftable_ref_record_val1(ref)) {
- err = reftable_buf_add(&buf, (char *)reftable_ref_record_val1(ref),
+ reftable_buf_reset(&w->scratch);
+ err = reftable_buf_add(&w->scratch, (char *)reftable_ref_record_val1(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
+ err = writer_index_hash(w, &w->scratch);
if (err < 0)
goto out;
}
if (!w->opts.skip_index_objects && reftable_ref_record_val2(ref)) {
- reftable_buf_reset(&buf);
- err = reftable_buf_add(&buf, reftable_ref_record_val2(ref),
+ reftable_buf_reset(&w->scratch);
+ err = reftable_buf_add(&w->scratch, reftable_ref_record_val2(ref),
hash_size(w->opts.hash_id));
if (err < 0)
goto out;
- err = writer_index_hash(w, &buf);
+ err = writer_index_hash(w, &w->scratch);
if (err < 0)
goto out;
}
@@ -365,7 +365,6 @@ int reftable_writer_add_ref(struct reftable_writer *w,
err = 0;
out:
- reftable_buf_release(&buf);
return err;
}
diff --git a/reftable/writer.h b/reftable/writer.h
index e8a6fbb78543e6e56920a2999601db0db9fe4d97..1f4788a430c52c5387b5e97f639e84544d0b9ba2 100644
--- a/reftable/writer.h
+++ b/reftable/writer.h
@@ -20,6 +20,8 @@ struct reftable_writer {
void *write_arg;
int pending_padding;
struct reftable_buf last_key;
+ /* Scratch buffer used to avoid allocations. */
+ struct reftable_buf scratch;
/* offset of next block to write. */
uint64_t next;
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 09/10] reftable/block: rename `block_writer::buf` variable
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (7 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 08/10] reftable/writer: optimize allocations by using a scratch buffer Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:27 ` [PATCH v3 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
2024-11-25 6:29 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
Adapt the name of the `block_writer::buf` variable to instead be called
`block`. This aligns it with the existing `block_len` variable, which
tracks the length of this buffer, and is generally a bit more tied to
the actual context where this variable gets used.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block.c | 20 ++++++++++----------
reftable/block.h | 8 ++++----
2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/reftable/block.c b/reftable/block.c
index f5b432566a6b9f171a1f1374b6c892ab0696d744..3fa36c002a0c1852790780e74a6e055161f857d9 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -70,14 +70,14 @@ static int block_writer_register_restart(struct block_writer *w, int n,
return 0;
}
-int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
+int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
uint32_t block_size, uint32_t header_off, int hash_size)
{
- bw->buf = buf;
+ bw->block = block;
bw->hash_size = hash_size;
bw->block_size = block_size;
bw->header_off = header_off;
- bw->buf[header_off] = typ;
+ bw->block[header_off] = typ;
bw->next = header_off + 4;
bw->restart_interval = 16;
bw->entries = 0;
@@ -95,7 +95,7 @@ int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
uint8_t block_writer_type(struct block_writer *bw)
{
- return bw->buf[bw->header_off];
+ return bw->block[bw->header_off];
}
/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
@@ -107,7 +107,7 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
struct reftable_buf last =
w->entries % w->restart_interval == 0 ? empty : w->last_key;
struct string_view out = {
- .buf = w->buf + w->next,
+ .buf = w->block + w->next,
.len = w->block_size - w->next,
};
@@ -153,13 +153,13 @@ int block_writer_finish(struct block_writer *w)
{
int i;
for (i = 0; i < w->restart_len; i++) {
- put_be24(w->buf + w->next, w->restarts[i]);
+ put_be24(w->block + w->next, w->restarts[i]);
w->next += 3;
}
- put_be16(w->buf + w->next, w->restart_len);
+ put_be16(w->block + w->next, w->restart_len);
w->next += 2;
- put_be24(w->buf + 1 + w->header_off, w->next);
+ put_be24(w->block + 1 + w->header_off, w->next);
/*
* Log records are stored zlib-compressed. Note that the compression
@@ -188,7 +188,7 @@ int block_writer_finish(struct block_writer *w)
w->zstream->next_out = w->compressed;
w->zstream->avail_out = compressed_len;
- w->zstream->next_in = w->buf + block_header_skip;
+ w->zstream->next_in = w->block + block_header_skip;
w->zstream->avail_in = src_len;
/*
@@ -206,7 +206,7 @@ int block_writer_finish(struct block_writer *w)
* adjust the `next` pointer to point right after the
* compressed data.
*/
- memcpy(w->buf + block_header_skip, w->compressed,
+ memcpy(w->block + block_header_skip, w->compressed,
w->zstream->total_out);
w->next = w->zstream->total_out + block_header_skip;
}
diff --git a/reftable/block.h b/reftable/block.h
index 9a3effa513420039ee3f2834339c5082f64339d0..b3f837d612a8f0fbe98430b04e2dddaa975a15ab 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -22,7 +22,7 @@ struct block_writer {
unsigned char *compressed;
size_t compressed_cap;
- uint8_t *buf;
+ uint8_t *block;
uint32_t block_size;
/* Offset of the global header. Nonzero in the first block only. */
@@ -43,9 +43,9 @@ struct block_writer {
};
/*
- * initializes the blockwriter to write `typ` entries, using `buf` as temporary
- * storage. `buf` is not owned by the block_writer. */
-int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
+ * initializes the blockwriter to write `typ` entries, using `block` as temporary
+ * storage. `block` is not owned by the block_writer. */
+int block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *block,
uint32_t block_size, uint32_t header_off, int hash_size);
/* returns the block type (eg. 'r' for ref records. */
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* [PATCH v3 10/10] reftable/block: optimize allocations by using scratch buffer
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (8 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 09/10] reftable/block: rename `block_writer::buf` variable Patrick Steinhardt
@ 2024-11-25 6:27 ` Patrick Steinhardt
2024-11-25 6:29 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:27 UTC (permalink / raw)
To: git; +Cc: karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
The block writer needs to compute the key for every record that one adds
to the writer. The buffer for this key is stored on the stack and thus
reallocated on every call to `block_writer_add()`, which is inefficient.
Refactor the code so that we store the buffer in the `block_writer`
struct itself so that we can reuse it. This reduces the number of
allocations when writing many refs, e.g. when migrating one million refs
from the "files" backend to the "reftable backend. Before this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 3,025,864 allocs, 3,025,815 frees, 372,746,291 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 80,048 bytes in 49 blocks
total heap usage: 2,013,250 allocs, 2,013,201 frees, 347,543,583 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
reftable/block.c | 13 +++++--------
reftable/block.h | 2 ++
2 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/reftable/block.c b/reftable/block.c
index 3fa36c002a0c1852790780e74a6e055161f857d9..01980784854cc454938bd2278b94047ff62c20d4 100644
--- a/reftable/block.c
+++ b/reftable/block.c
@@ -110,24 +110,21 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
.buf = w->block + w->next,
.len = w->block_size - w->next,
};
-
struct string_view start = out;
-
int is_restart = 0;
- struct reftable_buf key = REFTABLE_BUF_INIT;
int n = 0;
int err;
- err = reftable_record_key(rec, &key);
+ err = reftable_record_key(rec, &w->scratch);
if (err < 0)
goto done;
- if (!key.len) {
+ if (!w->scratch.len) {
err = REFTABLE_API_ERROR;
goto done;
}
- n = reftable_encode_key(&is_restart, out, last, key,
+ n = reftable_encode_key(&is_restart, out, last, w->scratch,
reftable_record_val_type(rec));
if (n < 0) {
err = -1;
@@ -143,9 +140,8 @@ int block_writer_add(struct block_writer *w, struct reftable_record *rec)
string_view_consume(&out, n);
err = block_writer_register_restart(w, start.len - out.len, is_restart,
- &key);
+ &w->scratch);
done:
- reftable_buf_release(&key);
return err;
}
@@ -569,6 +565,7 @@ void block_writer_release(struct block_writer *bw)
REFTABLE_FREE_AND_NULL(bw->zstream);
REFTABLE_FREE_AND_NULL(bw->restarts);
REFTABLE_FREE_AND_NULL(bw->compressed);
+ reftable_buf_release(&bw->scratch);
reftable_buf_release(&bw->last_key);
/* the block is not owned. */
}
diff --git a/reftable/block.h b/reftable/block.h
index b3f837d612a8f0fbe98430b04e2dddaa975a15ab..0431e8591f41dedfb96eef304ea63ef2e9e5f5dd 100644
--- a/reftable/block.h
+++ b/reftable/block.h
@@ -39,6 +39,8 @@ struct block_writer {
uint32_t restart_cap;
struct reftable_buf last_key;
+ /* Scratch buffer used to avoid allocations. */
+ struct reftable_buf scratch;
int entries;
};
--
2.47.0.274.g962d0b743d.dirty
^ permalink raw reply related [flat|nested] 54+ messages in thread
* Re: [PATCH v3 00/10] refs: optimize ref format migrations
2024-11-25 6:27 ` [PATCH v3 00/10] refs: optimize ref format migrations Patrick Steinhardt
` (9 preceding siblings ...)
2024-11-25 6:27 ` [PATCH v3 10/10] reftable/block: optimize allocations by using scratch buffer Patrick Steinhardt
@ 2024-11-25 6:29 ` Patrick Steinhardt
10 siblings, 0 replies; 54+ messages in thread
From: Patrick Steinhardt @ 2024-11-25 6:29 UTC (permalink / raw)
To: Patrick Steinhardt
Cc: git, karthik nayak, Junio C Hamano, Kristoffer Haugsbakk,
Christian Couder
On Mon, Nov 25, 2024 at 07:27:05AM +0100, Patrick Steinhardt wrote:
> Changes in v3:
>
> - Mention that we store the ref transaction flag for access by the
> backend in the first commit message.
> - Fix a missing word in another commit message.
> - Use `unsigned int` to pass `initial_transaction` flag.
> - Rename the scratch buffers and provide a comment for why they exist.
> - Link to v2: https://lore.kernel.org/r/20241120-pks-refs-optimize-migrations-v2-0-a233374b7452@pks.im
Oh, I just saw that the merge to 'next' has crossed with v3 of this
series. Not much of an issue, these were all cosmetic changes anyway. I
can send these in a follow-up series.
Patrick
^ permalink raw reply [flat|nested] 54+ messages in thread