git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/8] refs/reftable: reuse iterators when reading refs
@ 2024-11-04 15:11 Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
                   ` (11 more replies)
  0 siblings, 12 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

Hi,

this patch series refactors the reftable backend to reuse reftable
iterators when reading random references. This removes the overhead of
having to recreate the iterator on every read and thus leads to better
performance and less allocation churn. It also gives us the ability to
further optimize reads by optimizing re-seeking iterators in the future.

Overall this leads to a 7% speedup when creating many refs in a
transaction, which performs many random reads. But this change also
positively impacts other usecases.

Thanks!

Patrick

Patrick Steinhardt (8):
  refs/reftable: encapsulate reftable stack
  refs/reftable: handle reloading stacks in the reftable backend
  refs/reftable: read references via `struct reftable_backend`
  refs/reftable: refactor reading symbolic refs to use reftable backend
  refs/reftable: refactor reflog expiry to use reftable backend
  reftable/stack: add mechanism to notify callers on reload
  reftable/merged: drain priority queue on reseek
  refs/reftable: reuse iterators when reading refs

 refs/reftable-backend.c          | 356 ++++++++++++++++++-------------
 reftable/merged.c                |   2 +
 reftable/reftable-stack.h        |   3 +
 reftable/reftable-writer.h       |   9 +
 reftable/stack.c                 |   9 +
 t/unit-tests/t-reftable-merged.c |  73 +++++++
 6 files changed, 308 insertions(+), 144 deletions(-)

-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH 1/8] refs/reftable: encapsulate reftable stack
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-05 11:03   ` karthik nayak
  2024-11-04 15:11 ` [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
                   ` (10 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

The reftable ref store needs to keep track of multiple stacks, one for
the main worktree and an arbitrary number of stacks for worktrees. This
is done by storing pointers to `struct reftable_stack`, which we then
access directly.

Wrap the stack in a new `struct reftable_backend`. This will allow us to
attach more data to each respective stack in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 129 +++++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 56 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 38eb14d591..43cba53cb1 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -32,6 +32,23 @@
  */
 #define REF_UPDATE_VIA_HEAD (1 << 8)
 
+struct reftable_backend {
+	struct reftable_stack *stack;
+};
+
+static int reftable_backend_init(struct reftable_backend *be,
+				 const char *path,
+				 const struct reftable_write_options *opts)
+{
+	return reftable_new_stack(&be->stack, path, opts);
+}
+
+static void reftable_backend_release(struct reftable_backend *be)
+{
+	reftable_stack_destroy(be->stack);
+	be->stack = NULL;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -39,17 +56,17 @@ struct reftable_ref_store {
 	 * The main stack refers to the common dir and thus contains common
 	 * refs as well as refs of the main repository.
 	 */
-	struct reftable_stack *main_stack;
+	struct reftable_backend main_backend;
 	/*
 	 * The worktree stack refers to the gitdir in case the refdb is opened
 	 * via a worktree. It thus contains the per-worktree refs.
 	 */
-	struct reftable_stack *worktree_stack;
+	struct reftable_backend worktree_backend;
 	/*
 	 * Map of worktree stacks by their respective worktree names. The map
 	 * is populated lazily when we try to resolve `worktrees/$worktree` refs.
 	 */
-	struct strmap worktree_stacks;
+	struct strmap worktree_backends;
 	struct reftable_write_options write_options;
 
 	unsigned int store_flags;
@@ -95,21 +112,21 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_stack *stack_for(struct reftable_ref_store *store,
-					const char *refname,
-					const char **rewritten_ref)
+static struct reftable_backend *backend_for(struct reftable_ref_store *store,
+					    const char *refname,
+					    const char **rewritten_ref)
 {
 	const char *wtname;
 	int wtname_len;
 
 	if (!refname)
-		return store->main_stack;
+		return &store->main_backend;
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_stack *stack;
+		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -123,37 +140,39 @@ static struct reftable_stack *stack_for(struct reftable_ref_store *store,
 		/*
 		 * There is an edge case here: when the worktree references the
 		 * current worktree, then we set up the stack once via
-		 * `worktree_stacks` and once via `worktree_stack`. This is
+		 * `worktree_backends` and once via `worktree_backend`. This is
 		 * wasteful, but in the reading case it shouldn't matter. And
 		 * in the writing case we would notice that the stack is locked
 		 * already and error out when trying to write a reference via
 		 * both stacks.
 		 */
-		stack = strmap_get(&store->worktree_stacks, wtname_buf.buf);
-		if (!stack) {
+		be = strmap_get(&store->worktree_backends, wtname_buf.buf);
+		if (!be) {
 			strbuf_addf(&wt_dir, "%s/worktrees/%s/reftable",
 				    store->base.repo->commondir, wtname_buf.buf);
 
-			store->err = reftable_new_stack(&stack, wt_dir.buf,
-							&store->write_options);
+			CALLOC_ARRAY(be, 1);
+			store->err = reftable_backend_init(be, wt_dir.buf,
+							   &store->write_options);
 			assert(store->err != REFTABLE_API_ERROR);
-			strmap_put(&store->worktree_stacks, wtname_buf.buf, stack);
+
+			strmap_put(&store->worktree_backends, wtname_buf.buf, be);
 		}
 
 		strbuf_release(&wt_dir);
-		return stack;
+		return be;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
 		 * If there is no worktree stack then we're currently in the
 		 * main worktree. We thus return the main stack in that case.
 		 */
-		if (!store->worktree_stack)
-			return store->main_stack;
-		return store->worktree_stack;
+		if (!store->worktree_backend.stack)
+			return &store->main_backend;
+		return &store->worktree_backend;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return store->main_stack;
+		return &store->main_backend;
 	default:
 		BUG("unhandled worktree reference type");
 	}
@@ -285,7 +304,7 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 	umask(mask);
 
 	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
-	strmap_init(&refs->worktree_stacks);
+	strmap_init(&refs->worktree_backends);
 	refs->store_flags = store_flags;
 	refs->log_all_ref_updates = repo_settings_get_log_all_ref_updates(repo);
 
@@ -320,8 +339,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_realpath(&path, gitdir, 0);
 	}
 	strbuf_addstr(&path, "/reftable");
-	refs->err = reftable_new_stack(&refs->main_stack, path.buf,
-				       &refs->write_options);
+	refs->err = reftable_backend_init(&refs->main_backend, path.buf,
+					  &refs->write_options);
 	if (refs->err)
 		goto done;
 
@@ -337,8 +356,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_reset(&path);
 		strbuf_addf(&path, "%s/reftable", gitdir);
 
-		refs->err = reftable_new_stack(&refs->worktree_stack, path.buf,
-					       &refs->write_options);
+		refs->err = reftable_backend_init(&refs->worktree_backend, path.buf,
+						  &refs->write_options);
 		if (refs->err)
 			goto done;
 	}
@@ -357,19 +376,17 @@ static void reftable_be_release(struct ref_store *ref_store)
 	struct strmap_entry *entry;
 	struct hashmap_iter iter;
 
-	if (refs->main_stack) {
-		reftable_stack_destroy(refs->main_stack);
-		refs->main_stack = NULL;
-	}
+	if (refs->main_backend.stack)
+		reftable_backend_release(&refs->main_backend);
+	if (refs->worktree_backend.stack)
+		reftable_backend_release(&refs->worktree_backend);
 
-	if (refs->worktree_stack) {
-		reftable_stack_destroy(refs->worktree_stack);
-		refs->worktree_stack = NULL;
+	strmap_for_each_entry(&refs->worktree_backends, &iter, entry) {
+		struct reftable_backend *be = entry->value;
+		reftable_backend_release(be);
+		free(be);
 	}
-
-	strmap_for_each_entry(&refs->worktree_stacks, &iter, entry)
-		reftable_stack_destroy(entry->value);
-	strmap_clear(&refs->worktree_stacks, 0);
+	strmap_clear(&refs->worktree_backends, 0);
 }
 
 static int reftable_be_create_on_disk(struct ref_store *ref_store,
@@ -764,7 +781,7 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 		required_flags |= REF_STORE_ODB;
 	refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin");
 
-	main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix,
+	main_iter = ref_iterator_for_stack(refs, refs->main_backend.stack, prefix,
 					   exclude_patterns, flags);
 
 	/*
@@ -772,14 +789,14 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 	 * right now. If we aren't, then we return the common reftable
 	 * iterator, only.
 	 */
-	 if (!refs->worktree_stack)
+	 if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
 	/*
 	 * Otherwise we merge both the common and the per-worktree refs into a
 	 * single iterator.
 	 */
-	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix,
+	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_backend.stack, prefix,
 					       exclude_patterns, flags);
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -794,7 +811,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	int ret;
 
 	if (refs->err < 0)
@@ -821,7 +838,7 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
 	int ret;
 
@@ -881,7 +898,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = stack_for(refs, update->refname, NULL);
+	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
 	size_t i;
 	int ret;
@@ -1014,7 +1031,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, stack_for(refs, "HEAD", NULL), "HEAD",
+	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1026,7 +1043,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = stack_for(refs, u->refname, &rewritten_ref);
+		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1508,9 +1525,9 @@ static int reftable_be_pack_refs(struct ref_store *ref_store,
 	if (refs->err)
 		return refs->err;
 
-	stack = refs->worktree_stack;
+	stack = refs->worktree_backend.stack;
 	if (!stack)
-		stack = refs->main_stack;
+		stack = refs->main_backend.stack;
 
 	if (opts->flags & PACK_REFS_AUTO)
 		ret = reftable_stack_auto_compact(stack);
@@ -1765,7 +1782,7 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1797,7 +1814,7 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1935,11 +1952,11 @@ static struct ref_iterator *reftable_be_reflog_iterator_begin(struct ref_store *
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_iterator_begin");
 	struct reftable_reflog_iterator *main_iter, *worktree_iter;
 
-	main_iter = reflog_iterator_for_stack(refs, refs->main_stack);
-	if (!refs->worktree_stack)
+	main_iter = reflog_iterator_for_stack(refs, refs->main_backend.stack);
+	if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
-	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_stack);
+	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_backend.stack);
 
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -1978,7 +1995,7 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2018,7 +2035,7 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
 	size_t logs_alloc = 0, logs_nr = 0, i;
@@ -2067,7 +2084,7 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2152,7 +2169,7 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -2226,7 +2243,7 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_delete_arg arg = {
 		.stack = stack,
 		.refname = refname,
@@ -2335,7 +2352,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-05 11:14   ` karthik nayak
  2024-11-04 15:11 ` [PATCH 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
                   ` (9 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

When accessing a stack we almost always have to reload the stack before
reading data from it. This is mostly because Git does not have a
notification mechanism for when underlying data has been changed, and
thus we are forced to opportunistically reload the stack every single
time to account for any changes that may have happened concurrently.

Handle the reload internally in `backend_for()`. For one this forces
callsites to think about whether or not they need to reload the stack.
But second this makes the logic to access stacks more self-contained by
letting the `struct reftable_backend` manage themselves.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 150 +++++++++++++++++++++++++---------------
 1 file changed, 93 insertions(+), 57 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 43cba53cb1..93f3602faa 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -112,21 +112,25 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_backend *backend_for(struct reftable_ref_store *store,
-					    const char *refname,
-					    const char **rewritten_ref)
+static int backend_for(struct reftable_backend **out,
+		       struct reftable_ref_store *store,
+		       const char *refname,
+		       const char **rewritten_ref,
+		       int reload)
 {
+	struct reftable_backend *be;
 	const char *wtname;
 	int wtname_len;
 
-	if (!refname)
-		return &store->main_backend;
+	if (!refname) {
+		be = &store->main_backend;
+		goto out;
+	}
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -160,7 +164,7 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		}
 
 		strbuf_release(&wt_dir);
-		return be;
+		goto out;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
@@ -168,14 +172,27 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		 * main worktree. We thus return the main stack in that case.
 		 */
 		if (!store->worktree_backend.stack)
-			return &store->main_backend;
-		return &store->worktree_backend;
+			be = &store->main_backend;
+		else
+			be = &store->worktree_backend;
+		goto out;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return &store->main_backend;
+		be = &store->main_backend;
+		goto out;
 	default:
 		BUG("unhandled worktree reference type");
 	}
+
+out:
+	if (reload) {
+		int ret = reftable_stack_reload(be->stack);
+		if (ret)
+			return ret;
+	}
+	*out = be;
+
+	return 0;
 }
 
 static int should_write_log(struct reftable_ref_store *refs, const char *refname)
@@ -811,17 +828,17 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, stack, refname, oid, referent, type);
+	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -838,15 +855,15 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
+	struct reftable_backend *be;
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
 	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
 		strbuf_addstr(referent, ref.value.symref);
 	else
@@ -863,7 +880,7 @@ struct reftable_transaction_update {
 
 struct write_transaction_table_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	struct reftable_addition *addition;
 	struct reftable_transaction_update *updates;
 	size_t updates_nr;
@@ -898,27 +915,31 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
+	struct reftable_backend *be;
 	size_t i;
 	int ret;
 
+	ret = backend_for(&be, refs, update->refname, NULL, 0);
+	if (ret)
+		return ret;
+
 	/*
 	 * Search for a preexisting stack update. If there is one then we add
 	 * the update to it, otherwise we set up a new stack update.
 	 */
 	for (i = 0; !arg && i < tx_data->args_nr; i++)
-		if (tx_data->args[i].stack == stack)
+		if (tx_data->args[i].be == be)
 			arg = &tx_data->args[i];
 
 	if (!arg) {
 		struct reftable_addition *addition;
 
-		ret = reftable_stack_reload(stack);
+		ret = backend_for(&be, refs, update->refname, NULL, 1);
 		if (ret)
 			return ret;
 
-		ret = reftable_stack_new_addition(&addition, stack,
+		ret = reftable_stack_new_addition(&addition, be->stack,
 						  REFTABLE_STACK_NEW_ADDITION_RELOAD);
 		if (ret) {
 			if (ret == REFTABLE_LOCK_ERROR)
@@ -930,7 +951,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 			   tx_data->args_alloc);
 		arg = &tx_data->args[tx_data->args_nr++];
 		arg->refs = refs;
-		arg->stack = stack;
+		arg->be = be;
 		arg->addition = addition;
 		arg->updates = NULL;
 		arg->updates_nr = 0;
@@ -985,6 +1006,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	struct strbuf referent = STRBUF_INIT, head_referent = STRBUF_INIT;
 	struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
 	struct reftable_transaction_data *tx_data = NULL;
+	struct reftable_backend *be;
 	struct object_id head_oid;
 	unsigned int head_type = 0;
 	size_t i;
@@ -1031,7 +1053,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
+	ret = backend_for(&be, refs, "HEAD", NULL, 0);
+	if (ret)
+		goto done;
+
+	ret = read_ref_without_reload(refs, be->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1040,10 +1066,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	for (i = 0; i < transaction->nr; i++) {
 		struct ref_update *u = transaction->updates[i];
 		struct object_id current_oid = {0};
-		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
+		ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0);
+		if (ret)
+			goto done;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1099,7 +1126,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, stack, rewritten_ref,
+		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
 					      &current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
@@ -1301,7 +1328,7 @@ static int transaction_update_cmp(const void *a, const void *b)
 static int write_transaction_table(struct reftable_writer *writer, void *cb_data)
 {
 	struct write_transaction_table_arg *arg = cb_data;
-	uint64_t ts = reftable_stack_next_update_index(arg->stack);
+	uint64_t ts = reftable_stack_next_update_index(arg->be->stack);
 	struct reftable_log_record *logs = NULL;
 	struct ident_split committer_ident = {0};
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -1337,7 +1364,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			struct reftable_log_record log = {0};
 			struct reftable_iterator it = {0};
 
-			ret = reftable_stack_init_log_iterator(arg->stack, &it);
+			ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 			if (ret < 0)
 				goto done;
 
@@ -1782,10 +1809,9 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1797,10 +1823,11 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1814,10 +1841,9 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1828,10 +1854,11 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1995,15 +2022,19 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2035,16 +2066,20 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	size_t logs_alloc = 0, logs_nr = 0, i;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2084,20 +2119,20 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	ret = refs->err;
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2169,10 +2204,9 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
@@ -2181,11 +2215,12 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		goto done;
+	arg.stack = be->stack;
 
-	ret = reftable_stack_add(stack, &write_reflog_existence_table, &arg);
+	ret = reftable_stack_add(be->stack, &write_reflog_existence_table, &arg);
 
 done:
 	return ret;
@@ -2243,17 +2278,18 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_delete_arg arg = {
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
-	ret = reftable_stack_add(stack, &write_reflog_delete_table, &arg);
+	arg.stack = be->stack;
+
+	ret = reftable_stack_add(be->stack, &write_reflog_delete_table, &arg);
 
 	assert(ret != REFTABLE_API_ERROR);
 	return ret;
@@ -2352,13 +2388,13 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
+	struct reftable_backend *be;
 	struct object_id oid = {0};
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -2367,11 +2403,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2379,11 +2415,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_new_addition(&add, stack, 0);
+	ret = reftable_stack_new_addition(&add, be->stack, 0);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref_record);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
 	if (ret < 0)
 		goto done;
 	if (reftable_ref_record_val1(&ref_record))
@@ -2462,7 +2498,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	arg.refs = refs;
 	arg.records = rewritten;
 	arg.len = logs_nr;
-	arg.stack = stack,
+	arg.stack = be->stack,
 	arg.refname = refname,
 
 	ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg);
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 3/8] refs/reftable: read references via `struct reftable_backend`
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-05 11:20   ` karthik nayak
  2024-11-04 15:11 ` [PATCH 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
                   ` (8 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
as input instead of accepting a `struct reftable_stack`. This allows us
to implement an additional caching layer when reading refs where we can
reuse reftable iterators.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c   | 97 +++++++++++++++++++--------------------
 reftable/reftable-stack.h |  3 ++
 reftable/stack.c          |  5 ++
 3 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 93f3602faa..99caa9d5e6 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -49,6 +49,37 @@ static void reftable_backend_release(struct reftable_backend *be)
 	be->stack = NULL;
 }
 
+static int reftable_backend_read_ref(struct reftable_backend *be,
+				     const char *refname,
+				     struct object_id *oid,
+				     struct strbuf *referent,
+				     unsigned int *type)
+{
+	struct reftable_ref_record ref = {0};
+	int ret;
+
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (ret)
+		goto done;
+
+	if (ref.value_type == REFTABLE_REF_SYMREF) {
+		strbuf_reset(referent);
+		strbuf_addstr(referent, ref.value.symref);
+		*type |= REF_ISSYMREF;
+	} else if (reftable_ref_record_val1(&ref)) {
+		oidread(oid, reftable_ref_record_val1(&ref),
+			&hash_algos[hash_algo_by_id(reftable_stack_hash_id(be->stack))]);
+	} else {
+		/* We got a tombstone, which should not happen. */
+		BUG("unhandled reference value type %d", ref.value_type);
+	}
+
+done:
+	assert(ret != REFTABLE_API_ERROR);
+	reftable_ref_record_release(&ref);
+	return ret;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -241,38 +272,6 @@ static void fill_reftable_log_record(struct reftable_log_record *log, const stru
 	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
-static int read_ref_without_reload(struct reftable_ref_store *refs,
-				   struct reftable_stack *stack,
-				   const char *refname,
-				   struct object_id *oid,
-				   struct strbuf *referent,
-				   unsigned int *type)
-{
-	struct reftable_ref_record ref = {0};
-	int ret;
-
-	ret = reftable_stack_read_ref(stack, refname, &ref);
-	if (ret)
-		goto done;
-
-	if (ref.value_type == REFTABLE_REF_SYMREF) {
-		strbuf_reset(referent);
-		strbuf_addstr(referent, ref.value.symref);
-		*type |= REF_ISSYMREF;
-	} else if (reftable_ref_record_val1(&ref)) {
-		oidread(oid, reftable_ref_record_val1(&ref),
-			refs->base.repo->hash_algo);
-	} else {
-		/* We got a tombstone, which should not happen. */
-		BUG("unhandled reference value type %d", ref.value_type);
-	}
-
-done:
-	assert(ret != REFTABLE_API_ERROR);
-	reftable_ref_record_release(&ref);
-	return ret;
-}
-
 static int reftable_be_config(const char *var, const char *value,
 			      const struct config_context *ctx,
 			      void *_opts)
@@ -838,7 +837,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
+	ret = reftable_backend_read_ref(be, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -1057,8 +1056,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	if (ret)
 		goto done;
 
-	ret = read_ref_without_reload(refs, be->stack, "HEAD",
-				      &head_oid, &head_referent, &head_type);
+	ret = reftable_backend_read_ref(be, "HEAD", &head_oid,
+					&head_referent, &head_type);
 	if (ret < 0)
 		goto done;
 	ret = 0;
@@ -1126,8 +1125,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
-					      &current_oid, &referent, &u->type);
+		ret = reftable_backend_read_ref(be, rewritten_ref,
+						&current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
 		if (ret > 0 && !ref_update_expects_existing_old_ref(u)) {
@@ -1585,7 +1584,7 @@ struct write_create_symref_arg {
 
 struct write_copy_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	const char *oldname;
 	const char *newname;
 	const char *logmsg;
@@ -1610,7 +1609,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
 		BUG("failed splitting committer info");
 
-	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
+	if (reftable_stack_read_ref(arg->be->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
 	}
@@ -1649,7 +1648,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * the old branch and the creation of the new branch, and we cannot do
 	 * two changes to a reflog in a single update.
 	 */
-	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack);
+	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->be->stack);
 	if (arg->delete_old)
 		creation_ts++;
 	reftable_writer_set_limits(writer, deletion_ts, creation_ts);
@@ -1692,8 +1691,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
-		ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid,
-					      &head_referent, &head_type);
+		ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid,
+						&head_referent, &head_type);
 		if (ret < 0)
 			goto done;
 		append_head_reflog = (head_type & REF_ISSYMREF) && !strcmp(head_referent.buf, arg->oldname);
@@ -1736,7 +1735,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * copy over all log entries from the old reflog. Last but not least,
 	 * when renaming we also have to delete all the old reflog entries.
 	 */
-	ret = reftable_stack_init_log_iterator(arg->stack, &it);
+	ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -1809,7 +1808,6 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1823,11 +1821,10 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1841,7 +1838,6 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1854,11 +1850,10 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
index 54787f2ef5..6dfd22e3f5 100644
--- a/reftable/reftable-stack.h
+++ b/reftable/reftable-stack.h
@@ -149,4 +149,7 @@ struct reftable_compaction_stats {
 struct reftable_compaction_stats *
 reftable_stack_compaction_stats(struct reftable_stack *st);
 
+/* return the hash ID of the merged table. */
+uint32_t reftable_stack_hash_id(struct reftable_stack *st);
+
 #endif
diff --git a/reftable/stack.c b/reftable/stack.c
index c33979536e..530ba2d927 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1790,3 +1790,8 @@ int reftable_stack_clean(struct reftable_stack *st)
 	reftable_addition_destroy(add);
 	return err;
 }
+
+uint32_t reftable_stack_hash_id(struct reftable_stack *st)
+{
+	return reftable_merged_table_hash_id(st->merged);
+}
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (2 preceding siblings ...)
  2024-11-04 15:11 ` [PATCH 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 5/8] refs/reftable: refactor reflog expiry " Patrick Steinhardt
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

Refactor the callback function that reads symbolic references in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 99caa9d5e6..3912431111 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -854,21 +854,18 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_ref_record ref = {0};
 	struct reftable_backend *be;
+	struct object_id oid;
+	unsigned int type = 0;
 	int ret;
 
 	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
-	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
-		strbuf_addstr(referent, ref.value.symref);
-	else
+	ret = reftable_backend_read_ref(be, refname, &oid, referent, &type);
+	if (type != REF_ISSYMREF)
 		ret = -1;
-
-	reftable_ref_record_release(&ref);
 	return ret;
 }
 
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 5/8] refs/reftable: refactor reflog expiry to use reftable backend
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (3 preceding siblings ...)
  2024-11-04 15:11 ` [PATCH 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 6/8] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

Refactor the callback function that expires reflog entries in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 3912431111..98a070f5a7 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2382,14 +2382,15 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
-	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
 	struct reftable_backend *be;
 	struct object_id oid = {0};
+	struct strbuf referent = STRBUF_INIT;
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	unsigned int type = 0;
 	int ret;
 
 	if (refs->err < 0)
@@ -2411,12 +2412,9 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
+	ret = reftable_backend_read_ref(be, refname, &oid, &referent, &type);
 	if (ret < 0)
 		goto done;
-	if (reftable_ref_record_val1(&ref_record))
-		oidread(&oid, reftable_ref_record_val1(&ref_record),
-			ref_store->repo->hash_algo);
 	prepare_fn(refname, &oid, policy_cb_data);
 
 	while (1) {
@@ -2483,8 +2481,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		}
 	}
 
-	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash &&
-	    reftable_ref_record_val1(&ref_record))
+	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash && !is_null_oid(&oid))
 		oidread(&arg.update_oid, last_hash, ref_store->repo->hash_algo);
 
 	arg.refs = refs;
@@ -2509,11 +2506,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		cleanup_fn(policy_cb_data);
 	assert(ret != REFTABLE_API_ERROR);
 
-	reftable_ref_record_release(&ref_record);
 	reftable_iterator_destroy(&it);
 	reftable_addition_destroy(add);
 	for (i = 0; i < logs_nr; i++)
 		reftable_log_record_release(&logs[i]);
+	strbuf_release(&referent);
 	free(logs);
 	free(rewritten);
 	return ret;
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 6/8] reftable/stack: add mechanism to notify callers on reload
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (4 preceding siblings ...)
  2024-11-04 15:11 ` [PATCH 5/8] refs/reftable: refactor reflog expiry " Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-04 15:11 ` [PATCH 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

Reftable stacks are reloaded in two cases:

  - When calling `reftable_stack_reload()`, if the stat-cache tells us
    that the stack has been modified.

  - When committing a reftable addition.

While callers can figure out the second case, they do not have a
mechanism to figure out whether `reftable_stack_reload()` led to an
actual reload of the on-disk data. All they can do is thus to assume
that data is always being reloaded in that case.

Improve the situation by introducing a new `on_reload()` callback to the
reftable options. If provided, the function will be invoked every time
the stack has indeed been reloaded. This allows callers to invalidate
data that depends on the current stack data.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reftable-writer.h | 9 +++++++++
 reftable/stack.c           | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h
index e4fc953788..d027f7070b 100644
--- a/reftable/reftable-writer.h
+++ b/reftable/reftable-writer.h
@@ -62,6 +62,15 @@ struct reftable_write_options {
 	 * negative value will cause us to block indefinitely.
 	 */
 	long lock_timeout_ms;
+
+	/*
+	 * Callback function to execute whenever the stack is being reloaded.
+	 * This can be used e.g. to discard cached information that relies on
+	 * the old stack's data. The payload data will be passed as argument to
+	 * the callback.
+	 */
+	void (*on_reload)(void *payload);
+	void *on_reload_payload;
 };
 
 /* reftable_block_stats holds statistics for a single block type */
diff --git a/reftable/stack.c b/reftable/stack.c
index 530ba2d927..f943fc5b85 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -539,6 +539,10 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
 		close(fd);
 	free_names(names);
 	free_names(names_after);
+
+	if (st->opts.on_reload)
+		st->opts.on_reload(st->opts.on_reload_payload);
+
 	return err;
 }
 
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 7/8] reftable/merged: drain priority queue on reseek
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (5 preceding siblings ...)
  2024-11-04 15:11 ` [PATCH 6/8] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-05  3:16   ` Junio C Hamano
  2024-11-04 15:11 ` [PATCH 8/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (4 subsequent siblings)
  11 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

In 5bf96e0c39 (reftable/generic: move seeking of records into the
iterator, 2024-05-13) we have refactored the reftable codebase such that
iterators can be initialized once and then re-seeked multiple times.
This feature is used by 1869525066 (refs/reftable: wire up support for
exclude patterns, 2024-09-16) in order to skip records based on exclude
patterns provided by the caller.

The logic to re-seek the merged iterator is insufficient though because
we don't drain the priority queue on a re-seek. This means that the
queue may contain stale entries and thus reading the next record in the
queue will return the wrong entry. While this is an obvious bug, it is
harmless in the context of above exclude patterns:

  - If the queue contained stale entries that match the pattern then the
    caller would already know to filter out such refs. This is because
    our codebase is prepared to handle backends that don't have a way to
    efficiently implement exclude patterns.

  - If the queue contained stale entries that don't match the pattern
    we'd eventually filter out any duplicates. This is because the
    reftable code discards items with the same ref name and sorts any
    remaining entries properly.

So things happen to work in this context regardless of the bug, and
there is no other use case yet where we re-seek iterators. We're about
to introduce a caching mechanism though where iterators are reused by
the reftable backend, and that will expose the bug.

Fix the issue by draining the priority queue when seeking and add a
testcase that surfaces the issue.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/merged.c                |  2 +
 t/unit-tests/t-reftable-merged.c | 73 ++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/reftable/merged.c b/reftable/merged.c
index 514d6facf4..84d6e933f4 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -66,6 +66,8 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want
 	int err;
 
 	mi->advance_index = -1;
+	while (!merged_iter_pqueue_is_empty(mi->pq))
+		merged_iter_pqueue_remove(&mi->pq);
 
 	for (size_t i = 0; i < mi->subiters_len; i++) {
 		err = iterator_seek(&mi->subiters[i].iter, want);
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 4c25ee5334..99ca33f973 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -273,6 +273,78 @@ static void t_merged_seek_multiple_times(void)
 	reftable_free(sources);
 }
 
+static void t_merged_seek_multiple_times_without_draining(void)
+{
+	struct reftable_ref_record r1[] = {
+		{
+			.refname = (char *) "a",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 1 },
+		},
+		{
+			.refname = (char *) "c",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 2 },
+		}
+	};
+	struct reftable_ref_record r2[] = {
+		{
+			.refname = (char *) "b",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 3 },
+		},
+		{
+			.refname = (char *) "d",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 4 },
+		},
+	};
+	struct reftable_ref_record *refs[] = {
+		r1, r2,
+	};
+	size_t sizes[] = {
+		ARRAY_SIZE(r1), ARRAY_SIZE(r2),
+	};
+	struct reftable_buf bufs[] = {
+		REFTABLE_BUF_INIT, REFTABLE_BUF_INIT,
+	};
+	struct reftable_block_source *sources = NULL;
+	struct reftable_reader **readers = NULL;
+	struct reftable_ref_record rec = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_merged_table *mt;
+	int err;
+
+	mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2);
+	merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
+
+	err = reftable_iterator_seek_ref(&it, "b");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
+	check(err == 1);
+
+	err = reftable_iterator_seek_ref(&it, "a");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);
+	check(err == 1);
+
+	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
+		reftable_buf_release(&bufs[i]);
+	readers_destroy(readers, ARRAY_SIZE(refs));
+	reftable_ref_record_release(&rec);
+	reftable_iterator_destroy(&it);
+	reftable_merged_table_free(mt);
+	reftable_free(sources);
+}
+
 static struct reftable_merged_table *
 merged_table_from_log_records(struct reftable_log_record **logs,
 			      struct reftable_block_source **source,
@@ -467,6 +539,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
 	TEST(t_merged_logs(), "merged table with multiple log updates for same ref");
 	TEST(t_merged_refs(), "merged table with multiple updates to same ref");
 	TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times");
+	TEST(t_merged_seek_multiple_times_without_draining(), "merged table can seek multiple times without draining");
 	TEST(t_merged_single_record(), "ref occurring in only one record can be fetched");
 
 	return test_done();
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH 8/8] refs/reftable: reuse iterators when reading refs
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (6 preceding siblings ...)
  2024-11-04 15:11 ` [PATCH 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
@ 2024-11-04 15:11 ` Patrick Steinhardt
  2024-11-05  4:49 ` [PATCH 0/8] " Junio C Hamano
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-04 15:11 UTC (permalink / raw)
  To: git

When reading references the reftable backend has to:

  1. Create a new ref iterator.

  2. Seek the iterator to the record we're searching for.

  3. Read the record.

We cannot really avoid the last two steps, but re-creating the iterator
every single time we want to read a reference is kind of expensive and a
waste of resources. We couldn't help it in the past though because it
was not possible to reuse iterators. But starting with 5bf96e0c39
(reftable/generic: move seeking of records into the iterator,
2024-05-13) we have split up the iterator lifecycle such that creating
the iterator and seeking are two different concerns.

Refactor the code such that we cache iterators in the reftable backend.
This cache is invalidated whenever the respective stack is reloaded such
that we know to recreate the iterator in that case. This leads to a
sizeable speedup when creating many refs, which requires a lot of random
reference reads:

    Benchmark 1: update-ref: create many refs (refcount = 100000, revision = master)
      Time (mean ± σ):      1.793 s ±  0.010 s    [User: 0.954 s, System: 0.835 s]
      Range (min … max):    1.781 s …  1.811 s    10 runs

    Benchmark 2: update-ref: create many refs (refcount = 100000, revision = HEAD)
      Time (mean ± σ):      1.680 s ±  0.013 s    [User: 0.846 s, System: 0.831 s]
      Range (min … max):    1.664 s …  1.702 s    10 runs

    Summary
      update-ref: create many refs (refcount = 100000, revision = HEAD) ran
        1.07 ± 0.01 times faster than update-ref: create many refs (refcount = 100000, revision = master)

While 7% is not a huge win, you have to consider that the benchmark is
_writing_ data, so _reading_ references is only one part of what we do.
Flame graphs show that we spend around 40% of our time reading refs, so
the speedup when reading refs is approximately ~2.5x that. I could not
find better benchmarks where we perform a lot of random ref reads.

You can also see a sizeable impact on memory usage when creating 100k
references. Before this change:

    HEAP SUMMARY:
        in use at exit: 19,112,538 bytes in 200,170 blocks
      total heap usage: 8,400,426 allocs, 8,200,256 frees, 454,367,048 bytes allocated

After this change:

    HEAP SUMMARY:
        in use at exit: 674,416 bytes in 169 blocks
      total heap usage: 7,929,872 allocs, 7,929,703 frees, 281,509,985 bytes allocated

As an additional factor, this refactoring opens up the possibility for
more performance optimizations in how we re-seek iterators. Any change
that allows us to optimize re-seeking by e.g. reusing data structures
would thus also directly speed up random reads.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 98a070f5a7..e0577d666f 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -34,19 +34,30 @@
 
 struct reftable_backend {
 	struct reftable_stack *stack;
+	struct reftable_iterator it;
 };
 
+static void reftable_backend_on_reload(void *payload)
+{
+	struct reftable_backend *be = payload;
+	reftable_iterator_destroy(&be->it);
+}
+
 static int reftable_backend_init(struct reftable_backend *be,
 				 const char *path,
-				 const struct reftable_write_options *opts)
+				 const struct reftable_write_options *_opts)
 {
-	return reftable_new_stack(&be->stack, path, opts);
+	struct reftable_write_options opts = *_opts;
+	opts.on_reload = reftable_backend_on_reload;
+	opts.on_reload_payload = be;
+	return reftable_new_stack(&be->stack, path, &opts);
 }
 
 static void reftable_backend_release(struct reftable_backend *be)
 {
 	reftable_stack_destroy(be->stack);
 	be->stack = NULL;
+	reftable_iterator_destroy(&be->it);
 }
 
 static int reftable_backend_read_ref(struct reftable_backend *be,
@@ -58,10 +69,25 @@ static int reftable_backend_read_ref(struct reftable_backend *be,
 	struct reftable_ref_record ref = {0};
 	int ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (!be->it.ops) {
+		ret = reftable_stack_init_ref_iterator(be->stack, &be->it);
+		if (ret)
+			goto done;
+	}
+
+	ret = reftable_iterator_seek_ref(&be->it, refname);
 	if (ret)
 		goto done;
 
+	ret = reftable_iterator_next_ref(&be->it, &ref);
+	if (ret)
+		goto done;
+
+	if (strcmp(ref.refname, refname)) {
+		ret = 1;
+		goto done;
+	}
+
 	if (ref.value_type == REFTABLE_REF_SYMREF) {
 		strbuf_reset(referent);
 		strbuf_addstr(referent, ref.value.symref);
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH 7/8] reftable/merged: drain priority queue on reseek
  2024-11-04 15:11 ` [PATCH 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
@ 2024-11-05  3:16   ` Junio C Hamano
  2024-11-05  3:23     ` Junio C Hamano
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2024-11-05  3:16 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git

Patrick Steinhardt <ps@pks.im> writes:

> diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
> index 4c25ee5334..99ca33f973 100644
> --- a/t/unit-tests/t-reftable-merged.c
> +++ b/t/unit-tests/t-reftable-merged.c
> @@ -273,6 +273,78 @@ static void t_merged_seek_multiple_times(void)
>  	reftable_free(sources);
>  }
>  
> +static void t_merged_seek_multiple_times_without_draining(void)
> +{
> ...
> +	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
> +	check(err == 1);
> +
> +	err = reftable_iterator_seek_ref(&it, "a");
> +	check(!err);
> +	err = reftable_iterator_next_ref(&it, &rec);
> +	check(!err);
> +	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);

Did you mean REFTABLE_HASH_SIZE_SHA1 instead?

diff --git i/t/unit-tests/t-reftable-merged.c w/t/unit-tests/t-reftable-merged.c
index 620803e0ed..a12bd0e1a3 100644
--- i/t/unit-tests/t-reftable-merged.c
+++ w/t/unit-tests/t-reftable-merged.c
@@ -326,14 +326,14 @@ static void t_merged_seek_multiple_times_without_draining(void)
 	check(!err);
 	err = reftable_iterator_next_ref(&it, &rec);
 	check(!err);
-	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
+	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
 	check(err == 1);
 
 	err = reftable_iterator_seek_ref(&it, "a");
 	check(!err);
 	err = reftable_iterator_next_ref(&it, &rec);
 	check(!err);
-	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);
+	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
 	check(err == 1);
 
 	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH 7/8] reftable/merged: drain priority queue on reseek
  2024-11-05  3:16   ` Junio C Hamano
@ 2024-11-05  3:23     ` Junio C Hamano
  2024-11-05  7:14       ` Patrick Steinhardt
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2024-11-05  3:23 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> Did you mean REFTABLE_HASH_SIZE_SHA1 instead?

Ah, that transition hasn't happened yet on 'master'.  I'll carry the
semantic conflict resoluion in merge-fix hierarchy then.

> diff --git i/t/unit-tests/t-reftable-merged.c w/t/unit-tests/t-reftable-merged.c
> index 620803e0ed..a12bd0e1a3 100644
> --- i/t/unit-tests/t-reftable-merged.c
> +++ w/t/unit-tests/t-reftable-merged.c
> @@ -326,14 +326,14 @@ static void t_merged_seek_multiple_times_without_draining(void)
>  	check(!err);
>  	err = reftable_iterator_next_ref(&it, &rec);
>  	check(!err);
> -	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
> +	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
>  	check(err == 1);
>  
>  	err = reftable_iterator_seek_ref(&it, "a");
>  	check(!err);
>  	err = reftable_iterator_next_ref(&it, &rec);
>  	check(!err);
> -	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);
> +	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
>  	check(err == 1);
>  
>  	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)


^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 0/8] refs/reftable: reuse iterators when reading refs
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (7 preceding siblings ...)
  2024-11-04 15:11 ` [PATCH 8/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
@ 2024-11-05  4:49 ` Junio C Hamano
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 57+ messages in thread
From: Junio C Hamano @ 2024-11-05  4:49 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git

Patrick Steinhardt <ps@pks.im> writes:

> this patch series refactors the reftable backend to reuse reftable
> iterators when reading random references. This removes the overhead of
> having to recreate the iterator on every read and thus leads to better
> performance and less allocation churn. It also gives us the ability to
> further optimize reads by optimizing re-seeking iterators in the future.
>
> Overall this leads to a 7% speedup when creating many refs in a
> transaction, which performs many random reads. But this change also
> positively impacts other usecases.

When merged to 'seen', this seems to break a handful of tests.  I am
reasonably sure that it is interaction with other topics, as these
tests passes in this topic itself without other topics merged.

Thanks.


Test Summary Report
-------------------
t0611-reftable-httpd.sh             (Wstat: 256 (exited 1) Tests: 1 Failed: 1)
  Failed test:  1
  Non-zero exit status: 1
t0613-reftable-write-options.sh     (Wstat: 256 (exited 1) Tests: 11 Failed: 9)
  Failed tests:  1-3, 5-8, 10-11
  Non-zero exit status: 1
t7424-submodule-mixed-ref-formats.sh (Wstat: 256 (exited 1) Tests: 7 Failed: 6)
  Failed tests:  2-7
  Non-zero exit status: 1
t1460-refs-migrate.sh               (Wstat: 256 (exited 1) Tests: 30 Failed: 16)
  Failed tests:  9-15, 21-27, 29-30
  Non-zero exit status: 1
t0610-reftable-basics.sh            (Wstat: 256 (exited 1) Tests: 89 Failed: 53)
  Failed tests:  8, 10, 12, 14, 16, 18, 20, 22, 24-27, 38-42
                44, 47-52, 55-56, 58, 62-71, 73, 75-89
  Non-zero exit status: 1

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 7/8] reftable/merged: drain priority queue on reseek
  2024-11-05  3:23     ` Junio C Hamano
@ 2024-11-05  7:14       ` Patrick Steinhardt
  0 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  7:14 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Mon, Nov 04, 2024 at 07:23:21PM -0800, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
> 
> > Did you mean REFTABLE_HASH_SIZE_SHA1 instead?
> 
> Ah, that transition hasn't happened yet on 'master'.  I'll carry the
> semantic conflict resoluion in merge-fix hierarchy then.
> 
> > diff --git i/t/unit-tests/t-reftable-merged.c w/t/unit-tests/t-reftable-merged.c
> > index 620803e0ed..a12bd0e1a3 100644
> > --- i/t/unit-tests/t-reftable-merged.c
> > +++ w/t/unit-tests/t-reftable-merged.c
> > @@ -326,14 +326,14 @@ static void t_merged_seek_multiple_times_without_draining(void)
> >  	check(!err);
> >  	err = reftable_iterator_next_ref(&it, &rec);
> >  	check(!err);
> > -	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
> > +	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
> >  	check(err == 1);
> >  
> >  	err = reftable_iterator_seek_ref(&it, "a");
> >  	check(!err);
> >  	err = reftable_iterator_next_ref(&it, &rec);
> >  	check(!err);
> > -	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);
> > +	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
> >  	check(err == 1);
> >  
> >  	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)

This looks good to me. I guess I may end up resending this topic with a
new merge base in case the other failures are caused by that semantic
merge conflict, too.

Thanks!

Patrick

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 0/8] refs/reftable: reuse iterators when reading refs
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (8 preceding siblings ...)
  2024-11-05  4:49 ` [PATCH 0/8] " Junio C Hamano
@ 2024-11-05  9:11 ` Patrick Steinhardt
  2024-11-05  9:11   ` [PATCH v2 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
                     ` (7 more replies)
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
  11 siblings, 8 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:11 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Hi,

this is the second version of my patch series that refactors the
reftable backend to reuse iterators when reading random references. This
removes the overhead of having to recreate the iterator on every read
and thus leads to better performance and less allocation churn.

The only change compared to v2 is that I've rebased the series on top of
8f8d6eee53 (The seventh batch, 2024-11-01) with ps/reftable-detach at
3740325472 (reftable/system: provide thin wrapper for lockfile
subsystem, 2024-10-23) merged into it. This was done to fix textual and
semantic conflicts with that series.

Thanks!

Patrick

Patrick Steinhardt (8):
  refs/reftable: encapsulate reftable stack
  refs/reftable: handle reloading stacks in the reftable backend
  refs/reftable: read references via `struct reftable_backend`
  refs/reftable: refactor reading symbolic refs to use reftable backend
  refs/reftable: refactor reflog expiry to use reftable backend
  reftable/stack: add mechanism to notify callers on reload
  reftable/merged: drain priority queue on reseek
  refs/reftable: reuse iterators when reading refs

 refs/reftable-backend.c          | 369 +++++++++++++++++++------------
 reftable/merged.c                |   2 +
 reftable/reftable-stack.h        |   3 +
 reftable/reftable-writer.h       |   9 +
 reftable/stack.c                 |   9 +
 t/unit-tests/t-reftable-merged.c |  73 ++++++
 6 files changed, 321 insertions(+), 144 deletions(-)

Range-diff against v1:
1:  b599bcdac1 = 1:  ac01c06c41 refs/reftable: encapsulate reftable stack
2:  b81ce63589 = 2:  bab837e373 refs/reftable: handle reloading stacks in the reftable backend
3:  00fdf392a6 ! 3:  1b50655202 refs/reftable: read references via `struct reftable_backend`
    @@ refs/reftable-backend.c: static void reftable_backend_release(struct reftable_ba
     +		strbuf_addstr(referent, ref.value.symref);
     +		*type |= REF_ISSYMREF;
     +	} else if (reftable_ref_record_val1(&ref)) {
    ++		unsigned int hash_id;
    ++
    ++		switch (reftable_stack_hash_id(be->stack)) {
    ++		case REFTABLE_HASH_SHA1:
    ++			hash_id = GIT_HASH_SHA1;
    ++			break;
    ++		case REFTABLE_HASH_SHA256:
    ++			hash_id = GIT_HASH_SHA256;
    ++			break;
    ++		default:
    ++			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
    ++		}
    ++
     +		oidread(oid, reftable_ref_record_val1(&ref),
    -+			&hash_algos[hash_algo_by_id(reftable_stack_hash_id(be->stack))]);
    ++			&hash_algos[hash_id]);
     +	} else {
     +		/* We got a tombstone, which should not happen. */
     +		BUG("unhandled reference value type %d", ref.value_type);
    @@ reftable/reftable-stack.h: struct reftable_compaction_stats {
      struct reftable_compaction_stats *
      reftable_stack_compaction_stats(struct reftable_stack *st);
      
    -+/* return the hash ID of the merged table. */
    -+uint32_t reftable_stack_hash_id(struct reftable_stack *st);
    ++/* Return the hash of the stack. */
    ++enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
     +
      #endif
     
    @@ reftable/stack.c: int reftable_stack_clean(struct reftable_stack *st)
      	return err;
      }
     +
    -+uint32_t reftable_stack_hash_id(struct reftable_stack *st)
    ++enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
     +{
     +	return reftable_merged_table_hash_id(st->merged);
     +}
4:  142081cb0c = 4:  0906b04fc6 refs/reftable: refactor reading symbolic refs to use reftable backend
5:  44f4adce9a = 5:  355557ec95 refs/reftable: refactor reflog expiry to use reftable backend
6:  0a294b577f ! 6:  71ad6c80b0 reftable/stack: add mechanism to notify callers on reload
    @@ Commit message
     
      ## reftable/reftable-writer.h ##
     @@ reftable/reftable-writer.h: struct reftable_write_options {
    - 	 * negative value will cause us to block indefinitely.
    + 	 * fsync(3P) when unset.
      	 */
    - 	long lock_timeout_ms;
    + 	int (*fsync)(int fd);
     +
     +	/*
     +	 * Callback function to execute whenever the stack is being reloaded.
7:  45f397b563 ! 7:  93efd11886 reftable/merged: drain priority queue on reseek
    @@ t/unit-tests/t-reftable-merged.c: static void t_merged_seek_multiple_times(void)
     +	check(!err);
     +	err = reftable_iterator_next_ref(&it, &rec);
     +	check(!err);
    -+	err = reftable_ref_record_equal(&rec, &r2[0], GIT_SHA1_RAWSZ);
    ++	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
     +	check(err == 1);
     +
     +	err = reftable_iterator_seek_ref(&it, "a");
     +	check(!err);
     +	err = reftable_iterator_next_ref(&it, &rec);
     +	check(!err);
    -+	err = reftable_ref_record_equal(&rec, &r1[0], GIT_SHA1_RAWSZ);
    ++	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
     +	check(err == 1);
     +
     +	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
8:  feb4e6a36f = 8:  276c27e770 refs/reftable: reuse iterators when reading refs
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v2 1/8] refs/reftable: encapsulate reftable stack
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
@ 2024-11-05  9:11   ` Patrick Steinhardt
  2024-11-12  6:07     ` Junio C Hamano
  2024-11-05  9:12   ` [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
                     ` (6 subsequent siblings)
  7 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:11 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

The reftable ref store needs to keep track of multiple stacks, one for
the main worktree and an arbitrary number of stacks for worktrees. This
is done by storing pointers to `struct reftable_stack`, which we then
access directly.

Wrap the stack in a new `struct reftable_backend`. This will allow us to
attach more data to each respective stack in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 129 +++++++++++++++++++++++-----------------
 1 file changed, 73 insertions(+), 56 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index f560bc2b67..116cc5ec23 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -34,6 +34,23 @@
  */
 #define REF_UPDATE_VIA_HEAD (1 << 8)
 
+struct reftable_backend {
+	struct reftable_stack *stack;
+};
+
+static int reftable_backend_init(struct reftable_backend *be,
+				 const char *path,
+				 const struct reftable_write_options *opts)
+{
+	return reftable_new_stack(&be->stack, path, opts);
+}
+
+static void reftable_backend_release(struct reftable_backend *be)
+{
+	reftable_stack_destroy(be->stack);
+	be->stack = NULL;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -41,17 +58,17 @@ struct reftable_ref_store {
 	 * The main stack refers to the common dir and thus contains common
 	 * refs as well as refs of the main repository.
 	 */
-	struct reftable_stack *main_stack;
+	struct reftable_backend main_backend;
 	/*
 	 * The worktree stack refers to the gitdir in case the refdb is opened
 	 * via a worktree. It thus contains the per-worktree refs.
 	 */
-	struct reftable_stack *worktree_stack;
+	struct reftable_backend worktree_backend;
 	/*
 	 * Map of worktree stacks by their respective worktree names. The map
 	 * is populated lazily when we try to resolve `worktrees/$worktree` refs.
 	 */
-	struct strmap worktree_stacks;
+	struct strmap worktree_backends;
 	struct reftable_write_options write_options;
 
 	unsigned int store_flags;
@@ -97,21 +114,21 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_stack *stack_for(struct reftable_ref_store *store,
-					const char *refname,
-					const char **rewritten_ref)
+static struct reftable_backend *backend_for(struct reftable_ref_store *store,
+					    const char *refname,
+					    const char **rewritten_ref)
 {
 	const char *wtname;
 	int wtname_len;
 
 	if (!refname)
-		return store->main_stack;
+		return &store->main_backend;
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_stack *stack;
+		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -125,37 +142,39 @@ static struct reftable_stack *stack_for(struct reftable_ref_store *store,
 		/*
 		 * There is an edge case here: when the worktree references the
 		 * current worktree, then we set up the stack once via
-		 * `worktree_stacks` and once via `worktree_stack`. This is
+		 * `worktree_backends` and once via `worktree_backend`. This is
 		 * wasteful, but in the reading case it shouldn't matter. And
 		 * in the writing case we would notice that the stack is locked
 		 * already and error out when trying to write a reference via
 		 * both stacks.
 		 */
-		stack = strmap_get(&store->worktree_stacks, wtname_buf.buf);
-		if (!stack) {
+		be = strmap_get(&store->worktree_backends, wtname_buf.buf);
+		if (!be) {
 			strbuf_addf(&wt_dir, "%s/worktrees/%s/reftable",
 				    store->base.repo->commondir, wtname_buf.buf);
 
-			store->err = reftable_new_stack(&stack, wt_dir.buf,
-							&store->write_options);
+			CALLOC_ARRAY(be, 1);
+			store->err = reftable_backend_init(be, wt_dir.buf,
+							   &store->write_options);
 			assert(store->err != REFTABLE_API_ERROR);
-			strmap_put(&store->worktree_stacks, wtname_buf.buf, stack);
+
+			strmap_put(&store->worktree_backends, wtname_buf.buf, be);
 		}
 
 		strbuf_release(&wt_dir);
-		return stack;
+		return be;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
 		 * If there is no worktree stack then we're currently in the
 		 * main worktree. We thus return the main stack in that case.
 		 */
-		if (!store->worktree_stack)
-			return store->main_stack;
-		return store->worktree_stack;
+		if (!store->worktree_backend.stack)
+			return &store->main_backend;
+		return &store->worktree_backend;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return store->main_stack;
+		return &store->main_backend;
 	default:
 		BUG("unhandled worktree reference type");
 	}
@@ -292,7 +311,7 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 	umask(mask);
 
 	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
-	strmap_init(&refs->worktree_stacks);
+	strmap_init(&refs->worktree_backends);
 	refs->store_flags = store_flags;
 	refs->log_all_ref_updates = repo_settings_get_log_all_ref_updates(repo);
 
@@ -337,8 +356,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_realpath(&path, gitdir, 0);
 	}
 	strbuf_addstr(&path, "/reftable");
-	refs->err = reftable_new_stack(&refs->main_stack, path.buf,
-				       &refs->write_options);
+	refs->err = reftable_backend_init(&refs->main_backend, path.buf,
+					  &refs->write_options);
 	if (refs->err)
 		goto done;
 
@@ -354,8 +373,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_reset(&path);
 		strbuf_addf(&path, "%s/reftable", gitdir);
 
-		refs->err = reftable_new_stack(&refs->worktree_stack, path.buf,
-					       &refs->write_options);
+		refs->err = reftable_backend_init(&refs->worktree_backend, path.buf,
+						  &refs->write_options);
 		if (refs->err)
 			goto done;
 	}
@@ -374,19 +393,17 @@ static void reftable_be_release(struct ref_store *ref_store)
 	struct strmap_entry *entry;
 	struct hashmap_iter iter;
 
-	if (refs->main_stack) {
-		reftable_stack_destroy(refs->main_stack);
-		refs->main_stack = NULL;
-	}
+	if (refs->main_backend.stack)
+		reftable_backend_release(&refs->main_backend);
+	if (refs->worktree_backend.stack)
+		reftable_backend_release(&refs->worktree_backend);
 
-	if (refs->worktree_stack) {
-		reftable_stack_destroy(refs->worktree_stack);
-		refs->worktree_stack = NULL;
+	strmap_for_each_entry(&refs->worktree_backends, &iter, entry) {
+		struct reftable_backend *be = entry->value;
+		reftable_backend_release(be);
+		free(be);
 	}
-
-	strmap_for_each_entry(&refs->worktree_stacks, &iter, entry)
-		reftable_stack_destroy(entry->value);
-	strmap_clear(&refs->worktree_stacks, 0);
+	strmap_clear(&refs->worktree_backends, 0);
 }
 
 static int reftable_be_create_on_disk(struct ref_store *ref_store,
@@ -781,7 +798,7 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 		required_flags |= REF_STORE_ODB;
 	refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin");
 
-	main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix,
+	main_iter = ref_iterator_for_stack(refs, refs->main_backend.stack, prefix,
 					   exclude_patterns, flags);
 
 	/*
@@ -789,14 +806,14 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 	 * right now. If we aren't, then we return the common reftable
 	 * iterator, only.
 	 */
-	 if (!refs->worktree_stack)
+	 if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
 	/*
 	 * Otherwise we merge both the common and the per-worktree refs into a
 	 * single iterator.
 	 */
-	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix,
+	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_backend.stack, prefix,
 					       exclude_patterns, flags);
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -811,7 +828,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	int ret;
 
 	if (refs->err < 0)
@@ -838,7 +855,7 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
 	int ret;
 
@@ -898,7 +915,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = stack_for(refs, update->refname, NULL);
+	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
 	size_t i;
 	int ret;
@@ -1031,7 +1048,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, stack_for(refs, "HEAD", NULL), "HEAD",
+	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1043,7 +1060,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = stack_for(refs, u->refname, &rewritten_ref);
+		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1525,9 +1542,9 @@ static int reftable_be_pack_refs(struct ref_store *ref_store,
 	if (refs->err)
 		return refs->err;
 
-	stack = refs->worktree_stack;
+	stack = refs->worktree_backend.stack;
 	if (!stack)
-		stack = refs->main_stack;
+		stack = refs->main_backend.stack;
 
 	if (opts->flags & PACK_REFS_AUTO)
 		ret = reftable_stack_auto_compact(stack);
@@ -1782,7 +1799,7 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1814,7 +1831,7 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1952,11 +1969,11 @@ static struct ref_iterator *reftable_be_reflog_iterator_begin(struct ref_store *
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_iterator_begin");
 	struct reftable_reflog_iterator *main_iter, *worktree_iter;
 
-	main_iter = reflog_iterator_for_stack(refs, refs->main_stack);
-	if (!refs->worktree_stack)
+	main_iter = reflog_iterator_for_stack(refs, refs->main_backend.stack);
+	if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
-	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_stack);
+	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_backend.stack);
 
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -1995,7 +2012,7 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2035,7 +2052,7 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
 	size_t logs_alloc = 0, logs_nr = 0, i;
@@ -2084,7 +2101,7 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2169,7 +2186,7 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -2243,7 +2260,7 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_delete_arg arg = {
 		.stack = stack,
 		.refname = refname,
@@ -2352,7 +2369,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
  2024-11-05  9:11   ` [PATCH v2 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  2024-11-12  6:41     ` Junio C Hamano
  2024-11-05  9:12   ` [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
                     ` (5 subsequent siblings)
  7 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

When accessing a stack we almost always have to reload the stack before
reading data from it. This is mostly because Git does not have a
notification mechanism for when underlying data has been changed, and
thus we are forced to opportunistically reload the stack every single
time to account for any changes that may have happened concurrently.

Handle the reload internally in `backend_for()`. For one this forces
callsites to think about whether or not they need to reload the stack.
But second this makes the logic to access stacks more self-contained by
letting the `struct reftable_backend` manage themselves.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 150 +++++++++++++++++++++++++---------------
 1 file changed, 93 insertions(+), 57 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 116cc5ec23..4a28dc8a9d 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -114,21 +114,25 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_backend *backend_for(struct reftable_ref_store *store,
-					    const char *refname,
-					    const char **rewritten_ref)
+static int backend_for(struct reftable_backend **out,
+		       struct reftable_ref_store *store,
+		       const char *refname,
+		       const char **rewritten_ref,
+		       int reload)
 {
+	struct reftable_backend *be;
 	const char *wtname;
 	int wtname_len;
 
-	if (!refname)
-		return &store->main_backend;
+	if (!refname) {
+		be = &store->main_backend;
+		goto out;
+	}
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -162,7 +166,7 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		}
 
 		strbuf_release(&wt_dir);
-		return be;
+		goto out;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
@@ -170,14 +174,27 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		 * main worktree. We thus return the main stack in that case.
 		 */
 		if (!store->worktree_backend.stack)
-			return &store->main_backend;
-		return &store->worktree_backend;
+			be = &store->main_backend;
+		else
+			be = &store->worktree_backend;
+		goto out;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return &store->main_backend;
+		be = &store->main_backend;
+		goto out;
 	default:
 		BUG("unhandled worktree reference type");
 	}
+
+out:
+	if (reload) {
+		int ret = reftable_stack_reload(be->stack);
+		if (ret)
+			return ret;
+	}
+	*out = be;
+
+	return 0;
 }
 
 static int should_write_log(struct reftable_ref_store *refs, const char *refname)
@@ -828,17 +845,17 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, stack, refname, oid, referent, type);
+	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -855,15 +872,15 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
+	struct reftable_backend *be;
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
 	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
 		strbuf_addstr(referent, ref.value.symref);
 	else
@@ -880,7 +897,7 @@ struct reftable_transaction_update {
 
 struct write_transaction_table_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	struct reftable_addition *addition;
 	struct reftable_transaction_update *updates;
 	size_t updates_nr;
@@ -915,27 +932,31 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
+	struct reftable_backend *be;
 	size_t i;
 	int ret;
 
+	ret = backend_for(&be, refs, update->refname, NULL, 0);
+	if (ret)
+		return ret;
+
 	/*
 	 * Search for a preexisting stack update. If there is one then we add
 	 * the update to it, otherwise we set up a new stack update.
 	 */
 	for (i = 0; !arg && i < tx_data->args_nr; i++)
-		if (tx_data->args[i].stack == stack)
+		if (tx_data->args[i].be == be)
 			arg = &tx_data->args[i];
 
 	if (!arg) {
 		struct reftable_addition *addition;
 
-		ret = reftable_stack_reload(stack);
+		ret = backend_for(&be, refs, update->refname, NULL, 1);
 		if (ret)
 			return ret;
 
-		ret = reftable_stack_new_addition(&addition, stack,
+		ret = reftable_stack_new_addition(&addition, be->stack,
 						  REFTABLE_STACK_NEW_ADDITION_RELOAD);
 		if (ret) {
 			if (ret == REFTABLE_LOCK_ERROR)
@@ -947,7 +968,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 			   tx_data->args_alloc);
 		arg = &tx_data->args[tx_data->args_nr++];
 		arg->refs = refs;
-		arg->stack = stack;
+		arg->be = be;
 		arg->addition = addition;
 		arg->updates = NULL;
 		arg->updates_nr = 0;
@@ -1002,6 +1023,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	struct strbuf referent = STRBUF_INIT, head_referent = STRBUF_INIT;
 	struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
 	struct reftable_transaction_data *tx_data = NULL;
+	struct reftable_backend *be;
 	struct object_id head_oid;
 	unsigned int head_type = 0;
 	size_t i;
@@ -1048,7 +1070,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
+	ret = backend_for(&be, refs, "HEAD", NULL, 0);
+	if (ret)
+		goto done;
+
+	ret = read_ref_without_reload(refs, be->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1057,10 +1083,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	for (i = 0; i < transaction->nr; i++) {
 		struct ref_update *u = transaction->updates[i];
 		struct object_id current_oid = {0};
-		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
+		ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0);
+		if (ret)
+			goto done;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1116,7 +1143,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, stack, rewritten_ref,
+		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
 					      &current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
@@ -1318,7 +1345,7 @@ static int transaction_update_cmp(const void *a, const void *b)
 static int write_transaction_table(struct reftable_writer *writer, void *cb_data)
 {
 	struct write_transaction_table_arg *arg = cb_data;
-	uint64_t ts = reftable_stack_next_update_index(arg->stack);
+	uint64_t ts = reftable_stack_next_update_index(arg->be->stack);
 	struct reftable_log_record *logs = NULL;
 	struct ident_split committer_ident = {0};
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -1354,7 +1381,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			struct reftable_log_record log = {0};
 			struct reftable_iterator it = {0};
 
-			ret = reftable_stack_init_log_iterator(arg->stack, &it);
+			ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 			if (ret < 0)
 				goto done;
 
@@ -1799,10 +1826,9 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1814,10 +1840,11 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1831,10 +1858,9 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1845,10 +1871,11 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -2012,15 +2039,19 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2052,16 +2083,20 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	size_t logs_alloc = 0, logs_nr = 0, i;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2101,20 +2136,20 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	ret = refs->err;
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2186,10 +2221,9 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
@@ -2198,11 +2232,12 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		goto done;
+	arg.stack = be->stack;
 
-	ret = reftable_stack_add(stack, &write_reflog_existence_table, &arg);
+	ret = reftable_stack_add(be->stack, &write_reflog_existence_table, &arg);
 
 done:
 	return ret;
@@ -2260,17 +2295,18 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_delete_arg arg = {
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
-	ret = reftable_stack_add(stack, &write_reflog_delete_table, &arg);
+	arg.stack = be->stack;
+
+	ret = reftable_stack_add(be->stack, &write_reflog_delete_table, &arg);
 
 	assert(ret != REFTABLE_API_ERROR);
 	return ret;
@@ -2369,13 +2405,13 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
+	struct reftable_backend *be;
 	struct object_id oid = {0};
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -2384,11 +2420,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2396,11 +2432,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_new_addition(&add, stack, 0);
+	ret = reftable_stack_new_addition(&add, be->stack, 0);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref_record);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
 	if (ret < 0)
 		goto done;
 	if (reftable_ref_record_val1(&ref_record))
@@ -2479,7 +2515,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	arg.refs = refs;
 	arg.records = rewritten;
 	arg.len = logs_nr;
-	arg.stack = stack,
+	arg.stack = be->stack,
 	arg.refname = refname,
 
 	ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg);
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend`
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
  2024-11-05  9:11   ` [PATCH v2 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
  2024-11-05  9:12   ` [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  2024-11-12  7:26     ` Junio C Hamano
  2024-11-05  9:12   ` [PATCH v2 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
                     ` (4 subsequent siblings)
  7 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
as input instead of accepting a `struct reftable_stack`. This allows us
to implement an additional caching layer when reading refs where we can
reuse reftable iterators.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c   | 110 ++++++++++++++++++++------------------
 reftable/reftable-stack.h |   3 ++
 reftable/stack.c          |   5 ++
 3 files changed, 67 insertions(+), 51 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 4a28dc8a9d..230adb690d 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
 	be->stack = NULL;
 }
 
+static int reftable_backend_read_ref(struct reftable_backend *be,
+				     const char *refname,
+				     struct object_id *oid,
+				     struct strbuf *referent,
+				     unsigned int *type)
+{
+	struct reftable_ref_record ref = {0};
+	int ret;
+
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (ret)
+		goto done;
+
+	if (ref.value_type == REFTABLE_REF_SYMREF) {
+		strbuf_reset(referent);
+		strbuf_addstr(referent, ref.value.symref);
+		*type |= REF_ISSYMREF;
+	} else if (reftable_ref_record_val1(&ref)) {
+		unsigned int hash_id;
+
+		switch (reftable_stack_hash_id(be->stack)) {
+		case REFTABLE_HASH_SHA1:
+			hash_id = GIT_HASH_SHA1;
+			break;
+		case REFTABLE_HASH_SHA256:
+			hash_id = GIT_HASH_SHA256;
+			break;
+		default:
+			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
+		}
+
+		oidread(oid, reftable_ref_record_val1(&ref),
+			&hash_algos[hash_id]);
+	} else {
+		/* We got a tombstone, which should not happen. */
+		BUG("unhandled reference value type %d", ref.value_type);
+	}
+
+done:
+	assert(ret != REFTABLE_API_ERROR);
+	reftable_ref_record_release(&ref);
+	return ret;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -243,38 +287,6 @@ static void fill_reftable_log_record(struct reftable_log_record *log, const stru
 	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
-static int read_ref_without_reload(struct reftable_ref_store *refs,
-				   struct reftable_stack *stack,
-				   const char *refname,
-				   struct object_id *oid,
-				   struct strbuf *referent,
-				   unsigned int *type)
-{
-	struct reftable_ref_record ref = {0};
-	int ret;
-
-	ret = reftable_stack_read_ref(stack, refname, &ref);
-	if (ret)
-		goto done;
-
-	if (ref.value_type == REFTABLE_REF_SYMREF) {
-		strbuf_reset(referent);
-		strbuf_addstr(referent, ref.value.symref);
-		*type |= REF_ISSYMREF;
-	} else if (reftable_ref_record_val1(&ref)) {
-		oidread(oid, reftable_ref_record_val1(&ref),
-			refs->base.repo->hash_algo);
-	} else {
-		/* We got a tombstone, which should not happen. */
-		BUG("unhandled reference value type %d", ref.value_type);
-	}
-
-done:
-	assert(ret != REFTABLE_API_ERROR);
-	reftable_ref_record_release(&ref);
-	return ret;
-}
-
 static int reftable_be_config(const char *var, const char *value,
 			      const struct config_context *ctx,
 			      void *_opts)
@@ -855,7 +867,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
+	ret = reftable_backend_read_ref(be, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -1074,8 +1086,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	if (ret)
 		goto done;
 
-	ret = read_ref_without_reload(refs, be->stack, "HEAD",
-				      &head_oid, &head_referent, &head_type);
+	ret = reftable_backend_read_ref(be, "HEAD", &head_oid,
+					&head_referent, &head_type);
 	if (ret < 0)
 		goto done;
 	ret = 0;
@@ -1143,8 +1155,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
-					      &current_oid, &referent, &u->type);
+		ret = reftable_backend_read_ref(be, rewritten_ref,
+						&current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
 		if (ret > 0 && !ref_update_expects_existing_old_ref(u)) {
@@ -1602,7 +1614,7 @@ struct write_create_symref_arg {
 
 struct write_copy_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	const char *oldname;
 	const char *newname;
 	const char *logmsg;
@@ -1627,7 +1639,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
 		BUG("failed splitting committer info");
 
-	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
+	if (reftable_stack_read_ref(arg->be->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
 	}
@@ -1666,7 +1678,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * the old branch and the creation of the new branch, and we cannot do
 	 * two changes to a reflog in a single update.
 	 */
-	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack);
+	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->be->stack);
 	if (arg->delete_old)
 		creation_ts++;
 	reftable_writer_set_limits(writer, deletion_ts, creation_ts);
@@ -1709,8 +1721,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
-		ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid,
-					      &head_referent, &head_type);
+		ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid,
+						&head_referent, &head_type);
 		if (ret < 0)
 			goto done;
 		append_head_reflog = (head_type & REF_ISSYMREF) && !strcmp(head_referent.buf, arg->oldname);
@@ -1753,7 +1765,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * copy over all log entries from the old reflog. Last but not least,
 	 * when renaming we also have to delete all the old reflog entries.
 	 */
-	ret = reftable_stack_init_log_iterator(arg->stack, &it);
+	ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -1826,7 +1838,6 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1840,11 +1851,10 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1858,7 +1868,6 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1871,11 +1880,10 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
index 54787f2ef5..ae14270ea7 100644
--- a/reftable/reftable-stack.h
+++ b/reftable/reftable-stack.h
@@ -149,4 +149,7 @@ struct reftable_compaction_stats {
 struct reftable_compaction_stats *
 reftable_stack_compaction_stats(struct reftable_stack *st);
 
+/* Return the hash of the stack. */
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
+
 #endif
diff --git a/reftable/stack.c b/reftable/stack.c
index 1fffd75630..d97b64a40d 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1791,3 +1791,8 @@ int reftable_stack_clean(struct reftable_stack *st)
 	reftable_addition_destroy(add);
 	return err;
 }
+
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
+{
+	return reftable_merged_table_hash_id(st->merged);
+}
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
                     ` (2 preceding siblings ...)
  2024-11-05  9:12   ` [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  2024-11-05  9:12   ` [PATCH v2 5/8] refs/reftable: refactor reflog expiry " Patrick Steinhardt
                     ` (3 subsequent siblings)
  7 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Refactor the callback function that reads symbolic references in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 230adb690d..dfe94ff969 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -884,21 +884,18 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_ref_record ref = {0};
 	struct reftable_backend *be;
+	struct object_id oid;
+	unsigned int type = 0;
 	int ret;
 
 	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
-	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
-		strbuf_addstr(referent, ref.value.symref);
-	else
+	ret = reftable_backend_read_ref(be, refname, &oid, referent, &type);
+	if (type != REF_ISSYMREF)
 		ret = -1;
-
-	reftable_ref_record_release(&ref);
 	return ret;
 }
 
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 5/8] refs/reftable: refactor reflog expiry to use reftable backend
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
                     ` (3 preceding siblings ...)
  2024-11-05  9:12   ` [PATCH v2 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  2024-11-05  9:12   ` [PATCH v2 6/8] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
                     ` (2 subsequent siblings)
  7 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Refactor the callback function that expires reflog entries in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index dfe94ff969..9c6e9c8374 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2412,14 +2412,15 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
-	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
 	struct reftable_backend *be;
 	struct object_id oid = {0};
+	struct strbuf referent = STRBUF_INIT;
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	unsigned int type = 0;
 	int ret;
 
 	if (refs->err < 0)
@@ -2441,12 +2442,9 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
+	ret = reftable_backend_read_ref(be, refname, &oid, &referent, &type);
 	if (ret < 0)
 		goto done;
-	if (reftable_ref_record_val1(&ref_record))
-		oidread(&oid, reftable_ref_record_val1(&ref_record),
-			ref_store->repo->hash_algo);
 	prepare_fn(refname, &oid, policy_cb_data);
 
 	while (1) {
@@ -2513,8 +2511,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		}
 	}
 
-	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash &&
-	    reftable_ref_record_val1(&ref_record))
+	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash && !is_null_oid(&oid))
 		oidread(&arg.update_oid, last_hash, ref_store->repo->hash_algo);
 
 	arg.refs = refs;
@@ -2539,11 +2536,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		cleanup_fn(policy_cb_data);
 	assert(ret != REFTABLE_API_ERROR);
 
-	reftable_ref_record_release(&ref_record);
 	reftable_iterator_destroy(&it);
 	reftable_addition_destroy(add);
 	for (i = 0; i < logs_nr; i++)
 		reftable_log_record_release(&logs[i]);
+	strbuf_release(&referent);
 	free(logs);
 	free(rewritten);
 	return ret;
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 6/8] reftable/stack: add mechanism to notify callers on reload
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
                     ` (4 preceding siblings ...)
  2024-11-05  9:12   ` [PATCH v2 5/8] refs/reftable: refactor reflog expiry " Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  2024-11-05  9:12   ` [PATCH v2 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
  2024-11-05  9:12   ` [PATCH v2 8/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  7 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

Reftable stacks are reloaded in two cases:

  - When calling `reftable_stack_reload()`, if the stat-cache tells us
    that the stack has been modified.

  - When committing a reftable addition.

While callers can figure out the second case, they do not have a
mechanism to figure out whether `reftable_stack_reload()` led to an
actual reload of the on-disk data. All they can do is thus to assume
that data is always being reloaded in that case.

Improve the situation by introducing a new `on_reload()` callback to the
reftable options. If provided, the function will be invoked every time
the stack has indeed been reloaded. This allows callers to invalidate
data that depends on the current stack data.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reftable-writer.h | 9 +++++++++
 reftable/stack.c           | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h
index c85ef5a5bd..5f9afa620b 100644
--- a/reftable/reftable-writer.h
+++ b/reftable/reftable-writer.h
@@ -68,6 +68,15 @@ struct reftable_write_options {
 	 * fsync(3P) when unset.
 	 */
 	int (*fsync)(int fd);
+
+	/*
+	 * Callback function to execute whenever the stack is being reloaded.
+	 * This can be used e.g. to discard cached information that relies on
+	 * the old stack's data. The payload data will be passed as argument to
+	 * the callback.
+	 */
+	void (*on_reload)(void *payload);
+	void *on_reload_payload;
 };
 
 /* reftable_block_stats holds statistics for a single block type */
diff --git a/reftable/stack.c b/reftable/stack.c
index d97b64a40d..5384ca9de0 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -548,6 +548,10 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
 		close(fd);
 	free_names(names);
 	free_names(names_after);
+
+	if (st->opts.on_reload)
+		st->opts.on_reload(st->opts.on_reload_payload);
+
 	return err;
 }
 
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 7/8] reftable/merged: drain priority queue on reseek
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
                     ` (5 preceding siblings ...)
  2024-11-05  9:12   ` [PATCH v2 6/8] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  2024-11-05  9:12   ` [PATCH v2 8/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  7 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

In 5bf96e0c39 (reftable/generic: move seeking of records into the
iterator, 2024-05-13) we have refactored the reftable codebase such that
iterators can be initialized once and then re-seeked multiple times.
This feature is used by 1869525066 (refs/reftable: wire up support for
exclude patterns, 2024-09-16) in order to skip records based on exclude
patterns provided by the caller.

The logic to re-seek the merged iterator is insufficient though because
we don't drain the priority queue on a re-seek. This means that the
queue may contain stale entries and thus reading the next record in the
queue will return the wrong entry. While this is an obvious bug, it is
harmless in the context of above exclude patterns:

  - If the queue contained stale entries that match the pattern then the
    caller would already know to filter out such refs. This is because
    our codebase is prepared to handle backends that don't have a way to
    efficiently implement exclude patterns.

  - If the queue contained stale entries that don't match the pattern
    we'd eventually filter out any duplicates. This is because the
    reftable code discards items with the same ref name and sorts any
    remaining entries properly.

So things happen to work in this context regardless of the bug, and
there is no other use case yet where we re-seek iterators. We're about
to introduce a caching mechanism though where iterators are reused by
the reftable backend, and that will expose the bug.

Fix the issue by draining the priority queue when seeking and add a
testcase that surfaces the issue.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/merged.c                |  2 +
 t/unit-tests/t-reftable-merged.c | 73 ++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/reftable/merged.c b/reftable/merged.c
index 5b93e20f42..bb0836e344 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -66,6 +66,8 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want
 	int err;
 
 	mi->advance_index = -1;
+	while (!merged_iter_pqueue_is_empty(mi->pq))
+		merged_iter_pqueue_remove(&mi->pq);
 
 	for (size_t i = 0; i < mi->subiters_len; i++) {
 		err = iterator_seek(&mi->subiters[i].iter, want);
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 2591b5e597..a12bd0e1a3 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -273,6 +273,78 @@ static void t_merged_seek_multiple_times(void)
 	reftable_free(sources);
 }
 
+static void t_merged_seek_multiple_times_without_draining(void)
+{
+	struct reftable_ref_record r1[] = {
+		{
+			.refname = (char *) "a",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 1 },
+		},
+		{
+			.refname = (char *) "c",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 2 },
+		}
+	};
+	struct reftable_ref_record r2[] = {
+		{
+			.refname = (char *) "b",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 3 },
+		},
+		{
+			.refname = (char *) "d",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 4 },
+		},
+	};
+	struct reftable_ref_record *refs[] = {
+		r1, r2,
+	};
+	size_t sizes[] = {
+		ARRAY_SIZE(r1), ARRAY_SIZE(r2),
+	};
+	struct reftable_buf bufs[] = {
+		REFTABLE_BUF_INIT, REFTABLE_BUF_INIT,
+	};
+	struct reftable_block_source *sources = NULL;
+	struct reftable_reader **readers = NULL;
+	struct reftable_ref_record rec = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_merged_table *mt;
+	int err;
+
+	mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2);
+	merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
+
+	err = reftable_iterator_seek_ref(&it, "b");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
+	check(err == 1);
+
+	err = reftable_iterator_seek_ref(&it, "a");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
+	check(err == 1);
+
+	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
+		reftable_buf_release(&bufs[i]);
+	readers_destroy(readers, ARRAY_SIZE(refs));
+	reftable_ref_record_release(&rec);
+	reftable_iterator_destroy(&it);
+	reftable_merged_table_free(mt);
+	reftable_free(sources);
+}
+
 static struct reftable_merged_table *
 merged_table_from_log_records(struct reftable_log_record **logs,
 			      struct reftable_block_source **source,
@@ -467,6 +539,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
 	TEST(t_merged_logs(), "merged table with multiple log updates for same ref");
 	TEST(t_merged_refs(), "merged table with multiple updates to same ref");
 	TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times");
+	TEST(t_merged_seek_multiple_times_without_draining(), "merged table can seek multiple times without draining");
 	TEST(t_merged_single_record(), "ref occurring in only one record can be fetched");
 
 	return test_done();
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v2 8/8] refs/reftable: reuse iterators when reading refs
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
                     ` (6 preceding siblings ...)
  2024-11-05  9:12   ` [PATCH v2 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
@ 2024-11-05  9:12   ` Patrick Steinhardt
  7 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-05  9:12 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano

When reading references the reftable backend has to:

  1. Create a new ref iterator.

  2. Seek the iterator to the record we're searching for.

  3. Read the record.

We cannot really avoid the last two steps, but re-creating the iterator
every single time we want to read a reference is kind of expensive and a
waste of resources. We couldn't help it in the past though because it
was not possible to reuse iterators. But starting with 5bf96e0c39
(reftable/generic: move seeking of records into the iterator,
2024-05-13) we have split up the iterator lifecycle such that creating
the iterator and seeking are two different concerns.

Refactor the code such that we cache iterators in the reftable backend.
This cache is invalidated whenever the respective stack is reloaded such
that we know to recreate the iterator in that case. This leads to a
sizeable speedup when creating many refs, which requires a lot of random
reference reads:

    Benchmark 1: update-ref: create many refs (refcount = 100000, revision = master)
      Time (mean ± σ):      1.793 s ±  0.010 s    [User: 0.954 s, System: 0.835 s]
      Range (min … max):    1.781 s …  1.811 s    10 runs

    Benchmark 2: update-ref: create many refs (refcount = 100000, revision = HEAD)
      Time (mean ± σ):      1.680 s ±  0.013 s    [User: 0.846 s, System: 0.831 s]
      Range (min … max):    1.664 s …  1.702 s    10 runs

    Summary
      update-ref: create many refs (refcount = 100000, revision = HEAD) ran
        1.07 ± 0.01 times faster than update-ref: create many refs (refcount = 100000, revision = master)

While 7% is not a huge win, you have to consider that the benchmark is
_writing_ data, so _reading_ references is only one part of what we do.
Flame graphs show that we spend around 40% of our time reading refs, so
the speedup when reading refs is approximately ~2.5x that. I could not
find better benchmarks where we perform a lot of random ref reads.

You can also see a sizeable impact on memory usage when creating 100k
references. Before this change:

    HEAP SUMMARY:
        in use at exit: 19,112,538 bytes in 200,170 blocks
      total heap usage: 8,400,426 allocs, 8,200,256 frees, 454,367,048 bytes allocated

After this change:

    HEAP SUMMARY:
        in use at exit: 674,416 bytes in 169 blocks
      total heap usage: 7,929,872 allocs, 7,929,703 frees, 281,509,985 bytes allocated

As an additional factor, this refactoring opens up the possibility for
more performance optimizations in how we re-seek iterators. Any change
that allows us to optimize re-seeking by e.g. reusing data structures
would thus also directly speed up random reads.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 9c6e9c8374..4942363712 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -36,19 +36,30 @@
 
 struct reftable_backend {
 	struct reftable_stack *stack;
+	struct reftable_iterator it;
 };
 
+static void reftable_backend_on_reload(void *payload)
+{
+	struct reftable_backend *be = payload;
+	reftable_iterator_destroy(&be->it);
+}
+
 static int reftable_backend_init(struct reftable_backend *be,
 				 const char *path,
-				 const struct reftable_write_options *opts)
+				 const struct reftable_write_options *_opts)
 {
-	return reftable_new_stack(&be->stack, path, opts);
+	struct reftable_write_options opts = *_opts;
+	opts.on_reload = reftable_backend_on_reload;
+	opts.on_reload_payload = be;
+	return reftable_new_stack(&be->stack, path, &opts);
 }
 
 static void reftable_backend_release(struct reftable_backend *be)
 {
 	reftable_stack_destroy(be->stack);
 	be->stack = NULL;
+	reftable_iterator_destroy(&be->it);
 }
 
 static int reftable_backend_read_ref(struct reftable_backend *be,
@@ -60,10 +71,25 @@ static int reftable_backend_read_ref(struct reftable_backend *be,
 	struct reftable_ref_record ref = {0};
 	int ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (!be->it.ops) {
+		ret = reftable_stack_init_ref_iterator(be->stack, &be->it);
+		if (ret)
+			goto done;
+	}
+
+	ret = reftable_iterator_seek_ref(&be->it, refname);
 	if (ret)
 		goto done;
 
+	ret = reftable_iterator_next_ref(&be->it, &ref);
+	if (ret)
+		goto done;
+
+	if (strcmp(ref.refname, refname)) {
+		ret = 1;
+		goto done;
+	}
+
 	if (ref.value_type == REFTABLE_REF_SYMREF) {
 		strbuf_reset(referent);
 		strbuf_addstr(referent, ref.value.symref);
-- 
2.47.0.229.g8f8d6eee53.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH 1/8] refs/reftable: encapsulate reftable stack
  2024-11-04 15:11 ` [PATCH 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
@ 2024-11-05 11:03   ` karthik nayak
  0 siblings, 0 replies; 57+ messages in thread
From: karthik nayak @ 2024-11-05 11:03 UTC (permalink / raw)
  To: Patrick Steinhardt, git

[-- Attachment #1: Type: text/plain, Size: 2702 bytes --]

Patrick Steinhardt <ps@pks.im> writes:

> The reftable ref store needs to keep track of multiple stacks, one for
> the main worktree and an arbitrary number of stacks for worktrees. This
> is done by storing pointers to `struct reftable_stack`, which we then
> access directly.
>
> Wrap the stack in a new `struct reftable_backend`. This will allow us to
> attach more data to each respective stack in subsequent commits.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  refs/reftable-backend.c | 129 +++++++++++++++++++++++-----------------
>  1 file changed, 73 insertions(+), 56 deletions(-)
>
> diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
> index 38eb14d591..43cba53cb1 100644
> --- a/refs/reftable-backend.c
> +++ b/refs/reftable-backend.c
> @@ -32,6 +32,23 @@
>   */
>  #define REF_UPDATE_VIA_HEAD (1 << 8)
>
> +struct reftable_backend {
> +	struct reftable_stack *stack;
> +};
> +
> +static int reftable_backend_init(struct reftable_backend *be,
> +				 const char *path,
> +				 const struct reftable_write_options *opts)
> +{
> +	return reftable_new_stack(&be->stack, path, opts);
> +}
> +
> +static void reftable_backend_release(struct reftable_backend *be)
> +{
> +	reftable_stack_destroy(be->stack);
> +	be->stack = NULL;
> +}
> +
>  struct reftable_ref_store {
>  	struct ref_store base;
>
> @@ -39,17 +56,17 @@ struct reftable_ref_store {
>  	 * The main stack refers to the common dir and thus contains common
>  	 * refs as well as refs of the main repository.
>  	 */

Shouldn't these comments be updated to say s/stack/backend, while the
backend contains the stack, it is confusing to read stack and see
backend.

> -	struct reftable_stack *main_stack;
> +	struct reftable_backend main_backend;
>  	/*
>  	 * The worktree stack refers to the gitdir in case the refdb is opened
>  	 * via a worktree. It thus contains the per-worktree refs.
>  	 */

Here too.

> -	struct reftable_stack *worktree_stack;
> +	struct reftable_backend worktree_backend;
>  	/*
>  	 * Map of worktree stacks by their respective worktree names. The map
>  	 * is populated lazily when we try to resolve `worktrees/$worktree` refs.
>  	 */

Here too.

> -	struct strmap worktree_stacks;
> +	struct strmap worktree_backends;
>  	struct reftable_write_options write_options;
>
>  	unsigned int store_flags;

[snip]

> @@ -772,14 +789,14 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
>  	 * right now. If we aren't, then we return the common reftable
>  	 * iterator, only.
>  	 */
> -	 if (!refs->worktree_stack)
> +	 if (!refs->worktree_backend.stack)

Nit: Not your fault, but this is misaligned no?

>  		return &main_iter->base;
>

[snip]

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-04 15:11 ` [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-05 11:14   ` karthik nayak
  2024-11-06 10:43     ` Patrick Steinhardt
  0 siblings, 1 reply; 57+ messages in thread
From: karthik nayak @ 2024-11-05 11:14 UTC (permalink / raw)
  To: Patrick Steinhardt, git

[-- Attachment #1: Type: text/plain, Size: 1938 bytes --]

Patrick Steinhardt <ps@pks.im> writes:

[snip]

> @@ -898,27 +915,31 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
>  				      struct ref_update *update,
>  				      struct strbuf *err)
>  {
> -	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
>  	struct write_transaction_table_arg *arg = NULL;
> +	struct reftable_backend *be;
>  	size_t i;
>  	int ret;
>
> +	ret = backend_for(&be, refs, update->refname, NULL, 0);

So here, we don't reload the stack, it would be nice to add a comment
why, in the places we don't. Here, it seems to be because we possibly
already have an update which would have pushed us to reload the stack.

> +	if (ret)
> +		return ret;
> +

[snip]

> @@ -1995,15 +2022,19 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
>  {
>  	struct reftable_ref_store *refs =
>  		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
> -	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
>  	struct reftable_log_record log = {0};
>  	struct reftable_iterator it = {0};
> +	struct reftable_backend *be;
>  	int ret;
>
>  	if (refs->err < 0)
>  		return refs->err;
>
> -	ret = reftable_stack_init_log_iterator(stack, &it);
> +	ret = backend_for(&be, refs, refname, &refname, 0);

Like here, I'm not entirely sure why we don't reload the stack.

> +	if (ret)
> +		goto done;
> +
> +	ret = reftable_stack_init_log_iterator(be->stack, &it);
>  	if (ret < 0)
>  		goto done;
>

[snip]

> @@ -2462,7 +2498,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
>  	arg.refs = refs;
>  	arg.records = rewritten;
>  	arg.len = logs_nr;
> -	arg.stack = stack,
> +	arg.stack = be->stack,
>  	arg.refname = refname,
>

Shouldn't these lines end with ';'?

>  	ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg);
> --
> 2.47.0.229.g8f8d6eee53.dirty

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 3/8] refs/reftable: read references via `struct reftable_backend`
  2024-11-04 15:11 ` [PATCH 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-05 11:20   ` karthik nayak
  0 siblings, 0 replies; 57+ messages in thread
From: karthik nayak @ 2024-11-05 11:20 UTC (permalink / raw)
  To: Patrick Steinhardt, git

[-- Attachment #1: Type: text/plain, Size: 505 bytes --]

Patrick Steinhardt <ps@pks.im> writes:

> Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`

Do you mean `struct reftable_backend` here?

> as input instead of accepting a `struct reftable_stack`. This allows us
> to implement an additional caching layer when reading refs where we can
> reuse reftable iterators.
>

Nit: This commit also refactors the function to use the newly defined
'REFTABLE_HASH_*'. Maybe worth mentioning in the commit.

The patch itself looks good!

[snip]

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 690 bytes --]

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-05 11:14   ` karthik nayak
@ 2024-11-06 10:43     ` Patrick Steinhardt
  0 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-06 10:43 UTC (permalink / raw)
  To: karthik nayak; +Cc: git

On Tue, Nov 05, 2024 at 05:14:15AM -0600, karthik nayak wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> [snip]
> 
> > @@ -898,27 +915,31 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
> >  				      struct ref_update *update,
> >  				      struct strbuf *err)
> >  {
> > -	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
> >  	struct write_transaction_table_arg *arg = NULL;
> > +	struct reftable_backend *be;
> >  	size_t i;
> >  	int ret;
> >
> > +	ret = backend_for(&be, refs, update->refname, NULL, 0);
> 
> So here, we don't reload the stack, it would be nice to add a comment
> why, in the places we don't. Here, it seems to be because we possibly
> already have an update which would have pushed us to reload the stack.

I'll add comments.

> > @@ -2462,7 +2498,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
> >  	arg.refs = refs;
> >  	arg.records = rewritten;
> >  	arg.len = logs_nr;
> > -	arg.stack = stack,
> > +	arg.stack = be->stack,
> >  	arg.refname = refname,
> >
> 
> Shouldn't these lines end with ';'?

Yup, they should. It's harmless, but certainly confusing.

Patrick

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 1/8] refs/reftable: encapsulate reftable stack
  2024-11-05  9:11   ` [PATCH v2 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
@ 2024-11-12  6:07     ` Junio C Hamano
  0 siblings, 0 replies; 57+ messages in thread
From: Junio C Hamano @ 2024-11-12  6:07 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git

Patrick Steinhardt <ps@pks.im> writes:

> +static void reftable_backend_release(struct reftable_backend *be)
> +{
> +	reftable_stack_destroy(be->stack);
> +	be->stack = NULL;
> +}

OK, this turned out to be the required reading for the remainder of
the patch ;-).  Everything else was pretty much a mechanical
replacement from reftable_stack to reftable_backend, but to get rid
of the latter, we need to do this _release() thing where we used to
do the _destroy on a bare reftable_stack instance.

Looking good and understandable.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-05  9:12   ` [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-12  6:41     ` Junio C Hamano
  2024-11-12  9:05       ` Patrick Steinhardt
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2024-11-12  6:41 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git

Patrick Steinhardt <ps@pks.im> writes:

> +static int backend_for(struct reftable_backend **out,
> +		       struct reftable_ref_store *store,
> +		       const char *refname,
> +		       const char **rewritten_ref,
> +		       int reload)
>  {
> +	struct reftable_backend *be;
>  	const char *wtname;
>  	int wtname_len;
>  
> -	if (!refname)
> -		return &store->main_backend;
> +	if (!refname) {
> +		be = &store->main_backend;
> +		goto out;
> +	}
>  
>  	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
>  	case REF_WORKTREE_OTHER: {
>  		static struct strbuf wtname_buf = STRBUF_INIT;
>  		struct strbuf wt_dir = STRBUF_INIT;
> -		struct reftable_backend *be;
>  
>  		/*
>  		 * We're using a static buffer here so that we don't need to
> @@ -162,7 +166,7 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
>  		}
>  
>  		strbuf_release(&wt_dir);
> -		return be;
> +		goto out;

An interesting part of this function is not shown in the above
context, but we look up an existing backend from a strmap, and
allocate one if there isn't.  In either case, be points at the
backend to use.  Now be is not local to this block, we can access it
after jumping to "out" label.

> +out:
> +	if (reload) {
> +		int ret = reftable_stack_reload(be->stack);
> +		if (ret)
> +			return ret;
> +	}
> +	*out = be;
> +
> +	return 0;
>  }

> @@ -828,17 +845,17 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
>  {
>  	struct reftable_ref_store *refs =
>  		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
> -	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
> +	struct reftable_backend *be;
>  	int ret;
>  
>  	if (refs->err < 0)
>  		return refs->err;
>  
> -	ret = reftable_stack_reload(stack);
> +	ret = backend_for(&be, refs, refname, &refname, 1);
>  	if (ret)
>  		return ret;

This one chooses to reload, so that the next one, i.e.
"without-reload", would not read stale information?

> -	ret = read_ref_without_reload(refs, stack, refname, oid, referent, type);
> +	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);


The following bit is curious.

> +	ret = backend_for(&be, refs, update->refname, NULL, 0);
> +	if (ret)
> +		return ret;
> +

We locate one without reloading, and ...

>  	/*
>  	 * Search for a preexisting stack update. If there is one then we add
>  	 * the update to it, otherwise we set up a new stack update.
>  	 */
>  	for (i = 0; !arg && i < tx_data->args_nr; i++)
> -		if (tx_data->args[i].stack == stack)
> +		if (tx_data->args[i].be == be)
>  			arg = &tx_data->args[i];
>  	if (!arg) {

... only when we cannot reuse preexisting one, ...

>  		struct reftable_addition *addition;
>  
> -		ret = reftable_stack_reload(stack);
> +		ret = backend_for(&be, refs, update->refname, NULL, 1);
>  		if (ret)
>  			return ret;

... instead of directly doing reload on the instance we already
have, we do another _for() to locate one, this time reload set to 1.

That looks like doing some redundant work?  I am confused.

> @@ -1048,7 +1070,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
>  		goto done;
>  	}
>  
> -	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
> +	ret = backend_for(&be, refs, "HEAD", NULL, 0);
> +	if (ret)
> +		goto done;
> +
> +	ret = read_ref_without_reload(refs, be->stack, "HEAD",
>  				      &head_oid, &head_referent, &head_type);

This now takes into account the possibility that backend_for() might
fail.  The original code would have segfaulted when it happened, I
guess.

> @@ -1057,10 +1083,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
>  	for (i = 0; i < transaction->nr; i++) {
>  		struct ref_update *u = transaction->updates[i];
>  		struct object_id current_oid = {0};
> -		struct reftable_stack *stack;
>  		const char *rewritten_ref;
>  
> -		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
> +		ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0);
> +		if (ret)
> +			goto done;

Ditto, we would have segfaulted in the next hunk when stack got NULL
here ...

> @@ -1116,7 +1143,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
>  			string_list_insert(&affected_refnames, new_update->refname);
>  		}
>  
> -		ret = read_ref_without_reload(refs, stack, rewritten_ref,
> +		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
>  					      &current_oid, &referent, &u->type);

... here.

> @@ -1831,10 +1858,9 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
>  {
>  	struct reftable_ref_store *refs =
>  		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
> -	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
> +	struct reftable_backend *be;
>  	struct write_copy_arg arg = {
>  		.refs = refs,
> -		.stack = stack,
>  		.oldname = oldrefname,
>  		.newname = newrefname,
>  		.logmsg = logmsg,
> @@ -1845,10 +1871,11 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
>  	if (ret < 0)
>  		goto done;
>  
> -	ret = reftable_stack_reload(stack);
> +	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
>  	if (ret)
>  		goto done;

We used to grab "stack" upfront and then called reload here; we now
do backend_for() and let it do the reload as well, so they should be
equivalent.

> -	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
>  	struct reftable_log_record log = {0};
>  	struct reftable_iterator it = {0};
> +	struct reftable_backend *be;
>  	int ret;
>  
>  	if (refs->err < 0)
>  		return refs->err;
>  
> -	ret = reftable_stack_init_log_iterator(stack, &it);
> +	ret = backend_for(&be, refs, refname, &refname, 0);
> +	if (ret)
> +		goto done;
> +
> +	ret = reftable_stack_init_log_iterator(be->stack, &it);

Again, other than the fact that the new code carefully prepares for
the case where backend_for() fails to find be, the versions of the
code with and without the patch are equivalent.

> @@ -2052,16 +2083,20 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
>  {
>  	struct reftable_ref_store *refs =
>  		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
> -	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
>  	struct reftable_log_record *logs = NULL;
>  	struct reftable_iterator it = {0};
> +	struct reftable_backend *be;
>  	size_t logs_alloc = 0, logs_nr = 0, i;
>  	int ret;
>  
>  	if (refs->err < 0)
>  		return refs->err;
>  
> -	ret = reftable_stack_init_log_iterator(stack, &it);
> +	ret = backend_for(&be, refs, refname, &refname, 0);
> +	if (ret)
> +		goto done;
> +
> +	ret = reftable_stack_init_log_iterator(be->stack, &it);

Ditto.

> @@ -2101,20 +2136,20 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
>  {
>  	struct reftable_ref_store *refs =
>  		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
> -	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
>  	struct reftable_log_record log = {0};
>  	struct reftable_iterator it = {0};
> +	struct reftable_backend *be;
>  	int ret;
>  
>  	ret = refs->err;
>  	if (ret < 0)
>  		goto done;
>  
> -	ret = reftable_stack_reload(stack);
> +	ret = backend_for(&be, refs, refname, &refname, 1);
>  	if (ret < 0)
>  		goto done;
>  
> -	ret = reftable_stack_init_log_iterator(stack, &it);
> +	ret = reftable_stack_init_log_iterator(be->stack, &it);
>  	if (ret < 0)
>  		goto done;

Ditto.

Overall they seem to be mostly equivalent, except that the new code
is a bit more careful against failing backend_for().  One part of
the code confused me (and still I am unsure), but other than that it
was a pleasant read.

Thanks.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend`
  2024-11-05  9:12   ` [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-12  7:26     ` Junio C Hamano
  2024-11-12  9:05       ` Patrick Steinhardt
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2024-11-12  7:26 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git

Patrick Steinhardt <ps@pks.im> writes:

> Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
> as input instead of accepting a `struct reftable_stack`. This allows us
> to implement an additional caching layer when reading refs where we can
> reuse reftable iterators.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>  refs/reftable-backend.c   | 110 ++++++++++++++++++++------------------
>  reftable/reftable-stack.h |   3 ++
>  reftable/stack.c          |   5 ++
>  3 files changed, 67 insertions(+), 51 deletions(-)
>
> diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
> index 4a28dc8a9d..230adb690d 100644
> --- a/refs/reftable-backend.c
> +++ b/refs/reftable-backend.c
> @@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
>  	be->stack = NULL;
>  }
>  
> +static int reftable_backend_read_ref(struct reftable_backend *be,
> +				     const char *refname,
> +				     struct object_id *oid,
> +				     struct strbuf *referent,
> +				     unsigned int *type)
> +{
> +	struct reftable_ref_record ref = {0};
> +	int ret;
> +
> +	ret = reftable_stack_read_ref(be->stack, refname, &ref);
> +	if (ret)
> +		goto done;
> +
> +	if (ref.value_type == REFTABLE_REF_SYMREF) {
> +		strbuf_reset(referent);
> +		strbuf_addstr(referent, ref.value.symref);
> +		*type |= REF_ISSYMREF;
> +	} else if (reftable_ref_record_val1(&ref)) {
> +		unsigned int hash_id;
> +
> +		switch (reftable_stack_hash_id(be->stack)) {

So, relative to the original, instead of relying on the repository
and its knowledge of what hash function is used, we ask the stack
what hash function is in use and use that instead.

> +		case REFTABLE_HASH_SHA1:
> +			hash_id = GIT_HASH_SHA1;
> +			break;
> +		case REFTABLE_HASH_SHA256:
> +			hash_id = GIT_HASH_SHA256;
> +			break;
> +		default:
> +			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
> +		}
> +
> +		oidread(oid, reftable_ref_record_val1(&ref),
> +			&hash_algos[hash_id]);
> +	} else {
> +		/* We got a tombstone, which should not happen. */
> +		BUG("unhandled reference value type %d", ref.value_type);
> +	}
> +
> +done:
> +	assert(ret != REFTABLE_API_ERROR);
> +	reftable_ref_record_release(&ref);
> +	return ret;
> +}

Here is the original that got replaced.  Since ...

> -static int read_ref_without_reload(struct reftable_ref_store *refs,
> -				   struct reftable_stack *stack,
> -				   const char *refname,
> -				   struct object_id *oid,
> -				   struct strbuf *referent,
> -				   unsigned int *type)
> -{
> -	struct reftable_ref_record ref = {0};
> -	int ret;
> -
> -	ret = reftable_stack_read_ref(stack, refname, &ref);
> -	if (ret)
> -		goto done;
> -
> -	if (ref.value_type == REFTABLE_REF_SYMREF) {
> -		strbuf_reset(referent);
> -		strbuf_addstr(referent, ref.value.symref);
> -		*type |= REF_ISSYMREF;
> -	} else if (reftable_ref_record_val1(&ref)) {
> -		oidread(oid, reftable_ref_record_val1(&ref),
> -			refs->base.repo->hash_algo);

... we have access to "refs", which is a ref_store, that knows its
repository, it was just a few pointer references away to get the
hash id of the Git side.  But of course we use REFTABLE_HASH_*NAME*
to identify the algorithm at this layer, so we need to translate it
back to the ide on the Git side before asking oidread() to read it.

> -	} else {
> -		/* We got a tombstone, which should not happen. */
> -		BUG("unhandled reference value type %d", ref.value_type);
> -	}
> -
> -done:
> -	assert(ret != REFTABLE_API_ERROR);
> -	reftable_ref_record_release(&ref);
> -	return ret;
> -}

There is one thing that is curious about this step.

It isn't like we teach stack what hash it uses in this step---the
reftable_stack_hash_id() could have been implemented as early as
59343984 (reftable/system: stop depending on "hash.h", 2024-11-08).

Other than that this step introduces the first caller of
reftable_stack_hash_id() in the series, the remaining hunks of this
patch do not have to be part of this patch, but could have been a
separate step.  Not a suggestion to split it out, but merely an
observation (to make sure I am reading the code correctly).

> diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
> index 54787f2ef5..ae14270ea7 100644
> --- a/reftable/reftable-stack.h
> +++ b/reftable/reftable-stack.h
> @@ -149,4 +149,7 @@ struct reftable_compaction_stats {
>  struct reftable_compaction_stats *
>  reftable_stack_compaction_stats(struct reftable_stack *st);
>  
> +/* Return the hash of the stack. */
> +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
> +
>  #endif
> diff --git a/reftable/stack.c b/reftable/stack.c
> index 1fffd75630..d97b64a40d 100644
> --- a/reftable/stack.c
> +++ b/reftable/stack.c
> @@ -1791,3 +1791,8 @@ int reftable_stack_clean(struct reftable_stack *st)
>  	reftable_addition_destroy(add);
>  	return err;
>  }
> +
> +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
> +{
> +	return reftable_merged_table_hash_id(st->merged);
> +}

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-12  6:41     ` Junio C Hamano
@ 2024-11-12  9:05       ` Patrick Steinhardt
  0 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-12  9:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, Nov 12, 2024 at 03:41:48PM +0900, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> > -	ret = read_ref_without_reload(refs, stack, refname, oid, referent, type);
> > +	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
> 
> 
> The following bit is curious.
> 
> > +	ret = backend_for(&be, refs, update->refname, NULL, 0);
> > +	if (ret)
> > +		return ret;
> > +
> 
> We locate one without reloading, and ...
> 
> >  	/*
> >  	 * Search for a preexisting stack update. If there is one then we add
> >  	 * the update to it, otherwise we set up a new stack update.
> >  	 */
> >  	for (i = 0; !arg && i < tx_data->args_nr; i++)
> > -		if (tx_data->args[i].stack == stack)
> > +		if (tx_data->args[i].be == be)
> >  			arg = &tx_data->args[i];
> >  	if (!arg) {
> 
> ... only when we cannot reuse preexisting one, ...
> 
> >  		struct reftable_addition *addition;
> >  
> > -		ret = reftable_stack_reload(stack);
> > +		ret = backend_for(&be, refs, update->refname, NULL, 1);
> >  		if (ret)
> >  			return ret;
> 
> ... instead of directly doing reload on the instance we already
> have, we do another _for() to locate one, this time reload set to 1.
> 
> That looks like doing some redundant work?  I am confused.

It indeed is redundant work, yes. And in fact it is redundant work that
isn't really required anymore. My first iteration didn't yet have the
`reftable_write_options::on_reload()` callback, and I instead tried to
catch reloads via `backend_for()`, so it was required to reload via that
function.

But now that we do have the callback that isn't needed anymore, and thus
we don't have to call `backend_for()` a second time here. I'll adapt
this accordingly.

Patrick

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend`
  2024-11-12  7:26     ` Junio C Hamano
@ 2024-11-12  9:05       ` Patrick Steinhardt
  0 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-12  9:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Tue, Nov 12, 2024 at 04:26:38PM +0900, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
> > as input instead of accepting a `struct reftable_stack`. This allows us
> > to implement an additional caching layer when reading refs where we can
> > reuse reftable iterators.
> >
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> >  refs/reftable-backend.c   | 110 ++++++++++++++++++++------------------
> >  reftable/reftable-stack.h |   3 ++
> >  reftable/stack.c          |   5 ++
> >  3 files changed, 67 insertions(+), 51 deletions(-)
> >
> > diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
> > index 4a28dc8a9d..230adb690d 100644
> > --- a/refs/reftable-backend.c
> > +++ b/refs/reftable-backend.c
> > @@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
> >  	be->stack = NULL;
> >  }
> >  
> > +static int reftable_backend_read_ref(struct reftable_backend *be,
> > +				     const char *refname,
> > +				     struct object_id *oid,
> > +				     struct strbuf *referent,
> > +				     unsigned int *type)
> > +{
> > +	struct reftable_ref_record ref = {0};
> > +	int ret;
> > +
> > +	ret = reftable_stack_read_ref(be->stack, refname, &ref);
> > +	if (ret)
> > +		goto done;
> > +
> > +	if (ref.value_type == REFTABLE_REF_SYMREF) {
> > +		strbuf_reset(referent);
> > +		strbuf_addstr(referent, ref.value.symref);
> > +		*type |= REF_ISSYMREF;
> > +	} else if (reftable_ref_record_val1(&ref)) {
> > +		unsigned int hash_id;
> > +
> > +		switch (reftable_stack_hash_id(be->stack)) {
> 
> So, relative to the original, instead of relying on the repository
> and its knowledge of what hash function is used, we ask the stack
> what hash function is in use and use that instead.
> 
> > +		case REFTABLE_HASH_SHA1:
> > +			hash_id = GIT_HASH_SHA1;
> > +			break;
> > +		case REFTABLE_HASH_SHA256:
> > +			hash_id = GIT_HASH_SHA256;
> > +			break;
> > +		default:
> > +			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
> > +		}
> > +
> > +		oidread(oid, reftable_ref_record_val1(&ref),
> > +			&hash_algos[hash_id]);
> > +	} else {
> > +		/* We got a tombstone, which should not happen. */
> > +		BUG("unhandled reference value type %d", ref.value_type);
> > +	}
> > +
> > +done:
> > +	assert(ret != REFTABLE_API_ERROR);
> > +	reftable_ref_record_release(&ref);
> > +	return ret;
> > +}
> 
> Here is the original that got replaced.  Since ...
> 
> > -static int read_ref_without_reload(struct reftable_ref_store *refs,
> > -				   struct reftable_stack *stack,
> > -				   const char *refname,
> > -				   struct object_id *oid,
> > -				   struct strbuf *referent,
> > -				   unsigned int *type)
> > -{
> > -	struct reftable_ref_record ref = {0};
> > -	int ret;
> > -
> > -	ret = reftable_stack_read_ref(stack, refname, &ref);
> > -	if (ret)
> > -		goto done;
> > -
> > -	if (ref.value_type == REFTABLE_REF_SYMREF) {
> > -		strbuf_reset(referent);
> > -		strbuf_addstr(referent, ref.value.symref);
> > -		*type |= REF_ISSYMREF;
> > -	} else if (reftable_ref_record_val1(&ref)) {
> > -		oidread(oid, reftable_ref_record_val1(&ref),
> > -			refs->base.repo->hash_algo);
> 
> ... we have access to "refs", which is a ref_store, that knows its
> repository, it was just a few pointer references away to get the
> hash id of the Git side.  But of course we use REFTABLE_HASH_*NAME*
> to identify the algorithm at this layer, so we need to translate it
> back to the ide on the Git side before asking oidread() to read it.
> 
> > -	} else {
> > -		/* We got a tombstone, which should not happen. */
> > -		BUG("unhandled reference value type %d", ref.value_type);
> > -	}
> > -
> > -done:
> > -	assert(ret != REFTABLE_API_ERROR);
> > -	reftable_ref_record_release(&ref);
> > -	return ret;
> > -}
> 
> There is one thing that is curious about this step.
> 
> It isn't like we teach stack what hash it uses in this step---the
> reftable_stack_hash_id() could have been implemented as early as
> 59343984 (reftable/system: stop depending on "hash.h", 2024-11-08).

In theory we could've implemented it even earlier than that: the commit
only introduces the reftable-specific hashes, and we had the
Git-specific hashes available before that. Like that we wouldn't even
have to translate between the different hashes in the first place.

> Other than that this step introduces the first caller of
> reftable_stack_hash_id() in the series, the remaining hunks of this
> patch do not have to be part of this patch, but could have been a
> separate step.  Not a suggestion to split it out, but merely an
> observation (to make sure I am reading the code correctly).

Yup, your understanding matches mine.

Patrick

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 0/9] refs/reftable: reuse iterators when reading refs
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (9 preceding siblings ...)
  2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
@ 2024-11-25  7:38 ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 1/9] refs/reftable: encapsulate reftable stack Patrick Steinhardt
                     ` (9 more replies)
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
  11 siblings, 10 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Hi,

this is the second version of my patch series that refactors the
reftable backend to reuse iterators when reading random references. This
removes the overhead of having to recreate the iterator on every read
and thus leads to better performance and less allocation churn.

Changes in v3:

  - Adapt some comments to refer to the "backend" instead of to the
    "stack".
  - Fix indentation of a statement while at it.
  - Explain why callsites don't want to reload the stack.
  - Optimize `prepare_transaction_update()` by not using `backend_for()`
    twice, but instead reload the stack manually.
  - Split out the change that adds `reftable_stack_hash_id()` into a
    separate commit.
  - Link to v2: https://lore.kernel.org/r/cover.1730792627.git.ps@pks.im

Thanks!

Patrick

---
Patrick Steinhardt (9):
      refs/reftable: encapsulate reftable stack
      refs/reftable: handle reloading stacks in the reftable backend
      reftable/stack: add accessor for the hash ID
      refs/reftable: read references via `struct reftable_backend`
      refs/reftable: refactor reading symbolic refs to use reftable backend
      refs/reftable: refactor reflog expiry to use reftable backend
      reftable/stack: add mechanism to notify callers on reload
      reftable/merged: drain priority queue on reseek
      refs/reftable: reuse iterators when reading refs

 refs/reftable-backend.c          | 409 +++++++++++++++++++++++++--------------
 reftable/merged.c                |   2 +
 reftable/reftable-stack.h        |   3 +
 reftable/reftable-writer.h       |   9 +
 reftable/stack.c                 |   9 +
 t/unit-tests/t-reftable-merged.c |  73 +++++++
 6 files changed, 357 insertions(+), 148 deletions(-)

Range-diff versus v2:

 1:  9854214fe9 !  1:  21071ae5a5 refs/reftable: encapsulate reftable stack
    @@ refs/reftable-backend.c
      struct reftable_ref_store {
      	struct ref_store base;
      
    -@@ refs/reftable-backend.c: struct reftable_ref_store {
    - 	 * The main stack refers to the common dir and thus contains common
    + 	/*
    +-	 * The main stack refers to the common dir and thus contains common
    ++	 * The main backend refers to the common dir and thus contains common
      	 * refs as well as refs of the main repository.
      	 */
     -	struct reftable_stack *main_stack;
     +	struct reftable_backend main_backend;
      	/*
    - 	 * The worktree stack refers to the gitdir in case the refdb is opened
    +-	 * The worktree stack refers to the gitdir in case the refdb is opened
    ++	 * The worktree backend refers to the gitdir in case the refdb is opened
      	 * via a worktree. It thus contains the per-worktree refs.
      	 */
     -	struct reftable_stack *worktree_stack;
     +	struct reftable_backend worktree_backend;
      	/*
    - 	 * Map of worktree stacks by their respective worktree names. The map
    +-	 * Map of worktree stacks by their respective worktree names. The map
    ++	 * Map of worktree backends by their respective worktree names. The map
      	 * is populated lazily when we try to resolve `worktrees/$worktree` refs.
      	 */
     -	struct strmap worktree_stacks;
    @@ refs/reftable-backend.c: static struct ref_iterator *reftable_be_iterator_begin(
      	 * iterator, only.
      	 */
     -	 if (!refs->worktree_stack)
    -+	 if (!refs->worktree_backend.stack)
    ++	if (!refs->worktree_backend.stack)
      		return &main_iter->base;
      
      	/*
 2:  18265dfafc !  2:  a9588125c7 refs/reftable: handle reloading stacks in the reftable backend
    @@ Commit message
         But second this makes the logic to access stacks more self-contained by
         letting the `struct reftable_backend` manage themselves.
     
    +    Update callsites where we don't reload the stack to document why we
    +    don't. In some cases it's unclear whether it is the right thing to do in
    +    the first place, but fixing that is outside of the scope of this patch
    +    series.
    +
         Signed-off-by: Patrick Steinhardt <ps@pks.im>
     
      ## refs/reftable-backend.c ##
    @@ refs/reftable-backend.c: static int prepare_transaction_update(struct write_tran
      	size_t i;
      	int ret;
      
    ++	/*
    ++	 * This function gets called in a loop, and we don't want to repeatedly
    ++	 * reload the stack for every single ref update. Instead, we manually
    ++	 * reload further down in the case where we haven't yet prepared the
    ++	 * specific `reftable_backend`.
    ++	 */
     +	ret = backend_for(&be, refs, update->refname, NULL, 0);
     +	if (ret)
     +		return ret;
    @@ refs/reftable-backend.c: static int prepare_transaction_update(struct write_tran
      		struct reftable_addition *addition;
      
     -		ret = reftable_stack_reload(stack);
    -+		ret = backend_for(&be, refs, update->refname, NULL, 1);
    ++		ret = reftable_stack_reload(be->stack);
      		if (ret)
      			return ret;
      
    @@ refs/reftable-backend.c: static int reftable_be_transaction_prepare(struct ref_s
      	}
      
     -	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
    ++	/*
    ++	 * TODO: it's dubious whether we should reload the stack that "HEAD"
    ++	 * belongs to or not. In theory, it may happen that we only modify
    ++	 * stacks which are _not_ part of the "HEAD" stack. In that case we
    ++	 * wouldn't have prepared any transaction for its stack and would not
    ++	 * have reloaded it, which may mean that it is stale.
    ++	 *
    ++	 * On the other hand, reloading that stack without locking it feels
    ++	 * wrong to, as the value of "HEAD" could be modified concurrently at
    ++	 * any point in time.
    ++	 */
     +	ret = backend_for(&be, refs, "HEAD", NULL, 0);
     +	if (ret)
     +		goto done;
    @@ refs/reftable-backend.c: static int reftable_be_transaction_prepare(struct ref_s
      		const char *rewritten_ref;
      
     -		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
    ++		/*
    ++		 * There is no need to reload the respective backends here as
    ++		 * we have already reloaded them when preparing the transaction
    ++		 * update. And given that the stacks have been locked there
    ++		 * shouldn't have been any concurrent modifications of the
    ++		 * stack.
    ++		 */
     +		ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0);
     +		if (ret)
     +			goto done;
    @@ refs/reftable-backend.c: static int reftable_be_for_each_reflog_ent_reverse(stru
      		return refs->err;
      
     -	ret = reftable_stack_init_log_iterator(stack, &it);
    ++	/*
    ++	 * TODO: we should adapt this callsite to reload the stack. There is no
    ++	 * obvious reason why we shouldn't.
    ++	 */
     +	ret = backend_for(&be, refs, refname, &refname, 0);
     +	if (ret)
     +		goto done;
    @@ refs/reftable-backend.c: static int reftable_be_for_each_reflog_ent(struct ref_s
      		return refs->err;
      
     -	ret = reftable_stack_init_log_iterator(stack, &it);
    ++	/*
    ++	 * TODO: we should adapt this callsite to reload the stack. There is no
    ++	 * obvious reason why we shouldn't.
    ++	 */
     +	ret = backend_for(&be, refs, refname, &refname, 0);
     +	if (ret)
     +		goto done;
    @@ refs/reftable-backend.c: static int reftable_be_reflog_expire(struct ref_store *
      	arg.records = rewritten;
      	arg.len = logs_nr;
     -	arg.stack = stack,
    -+	arg.stack = be->stack,
    - 	arg.refname = refname,
    +-	arg.refname = refname,
    ++	arg.stack = be->stack;
    ++	arg.refname = refname;
      
      	ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg);
    + 	if (ret < 0)
 -:  ---------- >  3:  76f7ff40d2 reftable/stack: add accessor for the hash ID
 3:  c33093e73a !  4:  19da5f587c refs/reftable: read references via `struct reftable_backend`
    @@ refs/reftable-backend.c: static int reftable_be_copy_ref(struct ref_store *ref_s
      
      done:
      	assert(ret != REFTABLE_API_ERROR);
    -
    - ## reftable/reftable-stack.h ##
    -@@ reftable/reftable-stack.h: struct reftable_compaction_stats {
    - struct reftable_compaction_stats *
    - reftable_stack_compaction_stats(struct reftable_stack *st);
    - 
    -+/* Return the hash of the stack. */
    -+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
    -+
    - #endif
    -
    - ## reftable/stack.c ##
    -@@ reftable/stack.c: int reftable_stack_clean(struct reftable_stack *st)
    - 	reftable_addition_destroy(add);
    - 	return err;
    - }
    -+
    -+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
    -+{
    -+	return reftable_merged_table_hash_id(st->merged);
    -+}
 4:  8489e32d87 =  5:  ff4f02dda7 refs/reftable: refactor reading symbolic refs to use reftable backend
 5:  b1afd63785 =  6:  ed8963a520 refs/reftable: refactor reflog expiry to use reftable backend
 6:  1e754ccde8 =  7:  5b91ca48e3 reftable/stack: add mechanism to notify callers on reload
 7:  6755cf9ec9 =  8:  613f794fe6 reftable/merged: drain priority queue on reseek
 8:  e3b29b2035 =  9:  a44911e4a4 refs/reftable: reuse iterators when reading refs

---
base-commit: 455ddbf8c6a694968c1089fb6c7ffb1d31d97e9d
change-id: 20241125-pks-reftable-backend-reuse-iter-3a2e92428789


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v3 1/9] refs/reftable: encapsulate reftable stack
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 2/9] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

The reftable ref store needs to keep track of multiple stacks, one for
the main worktree and an arbitrary number of stacks for worktrees. This
is done by storing pointers to `struct reftable_stack`, which we then
access directly.

Wrap the stack in a new `struct reftable_backend`. This will allow us to
attach more data to each respective stack in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 135 +++++++++++++++++++++++++++---------------------
 1 file changed, 76 insertions(+), 59 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index f560bc2b67857d785294e6b5699383a256f30813..acd26f8928d18396f78a2d39ad0e0c1796d5a409 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -34,24 +34,41 @@
  */
 #define REF_UPDATE_VIA_HEAD (1 << 8)
 
+struct reftable_backend {
+	struct reftable_stack *stack;
+};
+
+static int reftable_backend_init(struct reftable_backend *be,
+				 const char *path,
+				 const struct reftable_write_options *opts)
+{
+	return reftable_new_stack(&be->stack, path, opts);
+}
+
+static void reftable_backend_release(struct reftable_backend *be)
+{
+	reftable_stack_destroy(be->stack);
+	be->stack = NULL;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
 	/*
-	 * The main stack refers to the common dir and thus contains common
+	 * The main backend refers to the common dir and thus contains common
 	 * refs as well as refs of the main repository.
 	 */
-	struct reftable_stack *main_stack;
+	struct reftable_backend main_backend;
 	/*
-	 * The worktree stack refers to the gitdir in case the refdb is opened
+	 * The worktree backend refers to the gitdir in case the refdb is opened
 	 * via a worktree. It thus contains the per-worktree refs.
 	 */
-	struct reftable_stack *worktree_stack;
+	struct reftable_backend worktree_backend;
 	/*
-	 * Map of worktree stacks by their respective worktree names. The map
+	 * Map of worktree backends by their respective worktree names. The map
 	 * is populated lazily when we try to resolve `worktrees/$worktree` refs.
 	 */
-	struct strmap worktree_stacks;
+	struct strmap worktree_backends;
 	struct reftable_write_options write_options;
 
 	unsigned int store_flags;
@@ -97,21 +114,21 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_stack *stack_for(struct reftable_ref_store *store,
-					const char *refname,
-					const char **rewritten_ref)
+static struct reftable_backend *backend_for(struct reftable_ref_store *store,
+					    const char *refname,
+					    const char **rewritten_ref)
 {
 	const char *wtname;
 	int wtname_len;
 
 	if (!refname)
-		return store->main_stack;
+		return &store->main_backend;
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_stack *stack;
+		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -125,37 +142,39 @@ static struct reftable_stack *stack_for(struct reftable_ref_store *store,
 		/*
 		 * There is an edge case here: when the worktree references the
 		 * current worktree, then we set up the stack once via
-		 * `worktree_stacks` and once via `worktree_stack`. This is
+		 * `worktree_backends` and once via `worktree_backend`. This is
 		 * wasteful, but in the reading case it shouldn't matter. And
 		 * in the writing case we would notice that the stack is locked
 		 * already and error out when trying to write a reference via
 		 * both stacks.
 		 */
-		stack = strmap_get(&store->worktree_stacks, wtname_buf.buf);
-		if (!stack) {
+		be = strmap_get(&store->worktree_backends, wtname_buf.buf);
+		if (!be) {
 			strbuf_addf(&wt_dir, "%s/worktrees/%s/reftable",
 				    store->base.repo->commondir, wtname_buf.buf);
 
-			store->err = reftable_new_stack(&stack, wt_dir.buf,
-							&store->write_options);
+			CALLOC_ARRAY(be, 1);
+			store->err = reftable_backend_init(be, wt_dir.buf,
+							   &store->write_options);
 			assert(store->err != REFTABLE_API_ERROR);
-			strmap_put(&store->worktree_stacks, wtname_buf.buf, stack);
+
+			strmap_put(&store->worktree_backends, wtname_buf.buf, be);
 		}
 
 		strbuf_release(&wt_dir);
-		return stack;
+		return be;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
 		 * If there is no worktree stack then we're currently in the
 		 * main worktree. We thus return the main stack in that case.
 		 */
-		if (!store->worktree_stack)
-			return store->main_stack;
-		return store->worktree_stack;
+		if (!store->worktree_backend.stack)
+			return &store->main_backend;
+		return &store->worktree_backend;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return store->main_stack;
+		return &store->main_backend;
 	default:
 		BUG("unhandled worktree reference type");
 	}
@@ -292,7 +311,7 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 	umask(mask);
 
 	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
-	strmap_init(&refs->worktree_stacks);
+	strmap_init(&refs->worktree_backends);
 	refs->store_flags = store_flags;
 	refs->log_all_ref_updates = repo_settings_get_log_all_ref_updates(repo);
 
@@ -337,8 +356,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_realpath(&path, gitdir, 0);
 	}
 	strbuf_addstr(&path, "/reftable");
-	refs->err = reftable_new_stack(&refs->main_stack, path.buf,
-				       &refs->write_options);
+	refs->err = reftable_backend_init(&refs->main_backend, path.buf,
+					  &refs->write_options);
 	if (refs->err)
 		goto done;
 
@@ -354,8 +373,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_reset(&path);
 		strbuf_addf(&path, "%s/reftable", gitdir);
 
-		refs->err = reftable_new_stack(&refs->worktree_stack, path.buf,
-					       &refs->write_options);
+		refs->err = reftable_backend_init(&refs->worktree_backend, path.buf,
+						  &refs->write_options);
 		if (refs->err)
 			goto done;
 	}
@@ -374,19 +393,17 @@ static void reftable_be_release(struct ref_store *ref_store)
 	struct strmap_entry *entry;
 	struct hashmap_iter iter;
 
-	if (refs->main_stack) {
-		reftable_stack_destroy(refs->main_stack);
-		refs->main_stack = NULL;
-	}
+	if (refs->main_backend.stack)
+		reftable_backend_release(&refs->main_backend);
+	if (refs->worktree_backend.stack)
+		reftable_backend_release(&refs->worktree_backend);
 
-	if (refs->worktree_stack) {
-		reftable_stack_destroy(refs->worktree_stack);
-		refs->worktree_stack = NULL;
+	strmap_for_each_entry(&refs->worktree_backends, &iter, entry) {
+		struct reftable_backend *be = entry->value;
+		reftable_backend_release(be);
+		free(be);
 	}
-
-	strmap_for_each_entry(&refs->worktree_stacks, &iter, entry)
-		reftable_stack_destroy(entry->value);
-	strmap_clear(&refs->worktree_stacks, 0);
+	strmap_clear(&refs->worktree_backends, 0);
 }
 
 static int reftable_be_create_on_disk(struct ref_store *ref_store,
@@ -781,7 +798,7 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 		required_flags |= REF_STORE_ODB;
 	refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin");
 
-	main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix,
+	main_iter = ref_iterator_for_stack(refs, refs->main_backend.stack, prefix,
 					   exclude_patterns, flags);
 
 	/*
@@ -789,14 +806,14 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 	 * right now. If we aren't, then we return the common reftable
 	 * iterator, only.
 	 */
-	 if (!refs->worktree_stack)
+	if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
 	/*
 	 * Otherwise we merge both the common and the per-worktree refs into a
 	 * single iterator.
 	 */
-	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix,
+	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_backend.stack, prefix,
 					       exclude_patterns, flags);
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -811,7 +828,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	int ret;
 
 	if (refs->err < 0)
@@ -838,7 +855,7 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
 	int ret;
 
@@ -898,7 +915,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = stack_for(refs, update->refname, NULL);
+	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
 	size_t i;
 	int ret;
@@ -1031,7 +1048,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, stack_for(refs, "HEAD", NULL), "HEAD",
+	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1043,7 +1060,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = stack_for(refs, u->refname, &rewritten_ref);
+		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1525,9 +1542,9 @@ static int reftable_be_pack_refs(struct ref_store *ref_store,
 	if (refs->err)
 		return refs->err;
 
-	stack = refs->worktree_stack;
+	stack = refs->worktree_backend.stack;
 	if (!stack)
-		stack = refs->main_stack;
+		stack = refs->main_backend.stack;
 
 	if (opts->flags & PACK_REFS_AUTO)
 		ret = reftable_stack_auto_compact(stack);
@@ -1782,7 +1799,7 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1814,7 +1831,7 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1952,11 +1969,11 @@ static struct ref_iterator *reftable_be_reflog_iterator_begin(struct ref_store *
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_iterator_begin");
 	struct reftable_reflog_iterator *main_iter, *worktree_iter;
 
-	main_iter = reflog_iterator_for_stack(refs, refs->main_stack);
-	if (!refs->worktree_stack)
+	main_iter = reflog_iterator_for_stack(refs, refs->main_backend.stack);
+	if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
-	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_stack);
+	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_backend.stack);
 
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -1995,7 +2012,7 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2035,7 +2052,7 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
 	size_t logs_alloc = 0, logs_nr = 0, i;
@@ -2084,7 +2101,7 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2169,7 +2186,7 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -2243,7 +2260,7 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_delete_arg arg = {
 		.stack = stack,
 		.refname = refname,
@@ -2352,7 +2369,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 2/9] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 1/9] refs/reftable: encapsulate reftable stack Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-26  0:31     ` Junio C Hamano
  2024-11-25  7:38   ` [PATCH v3 3/9] reftable/stack: add accessor for the hash ID Patrick Steinhardt
                     ` (7 subsequent siblings)
  9 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

When accessing a stack we almost always have to reload the stack before
reading data from it. This is mostly because Git does not have a
notification mechanism for when underlying data has been changed, and
thus we are forced to opportunistically reload the stack every single
time to account for any changes that may have happened concurrently.

Handle the reload internally in `backend_for()`. For one this forces
callsites to think about whether or not they need to reload the stack.
But second this makes the logic to access stacks more self-contained by
letting the `struct reftable_backend` manage themselves.

Update callsites where we don't reload the stack to document why we
don't. In some cases it's unclear whether it is the right thing to do in
the first place, but fixing that is outside of the scope of this patch
series.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 184 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 126 insertions(+), 58 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index acd26f8928d18396f78a2d39ad0e0c1796d5a409..64fe8fd02d8ec932f7980cdb7d7d5223c3c83b73 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -114,21 +114,25 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_backend *backend_for(struct reftable_ref_store *store,
-					    const char *refname,
-					    const char **rewritten_ref)
+static int backend_for(struct reftable_backend **out,
+		       struct reftable_ref_store *store,
+		       const char *refname,
+		       const char **rewritten_ref,
+		       int reload)
 {
+	struct reftable_backend *be;
 	const char *wtname;
 	int wtname_len;
 
-	if (!refname)
-		return &store->main_backend;
+	if (!refname) {
+		be = &store->main_backend;
+		goto out;
+	}
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -162,7 +166,7 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		}
 
 		strbuf_release(&wt_dir);
-		return be;
+		goto out;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
@@ -170,14 +174,27 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		 * main worktree. We thus return the main stack in that case.
 		 */
 		if (!store->worktree_backend.stack)
-			return &store->main_backend;
-		return &store->worktree_backend;
+			be = &store->main_backend;
+		else
+			be = &store->worktree_backend;
+		goto out;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return &store->main_backend;
+		be = &store->main_backend;
+		goto out;
 	default:
 		BUG("unhandled worktree reference type");
 	}
+
+out:
+	if (reload) {
+		int ret = reftable_stack_reload(be->stack);
+		if (ret)
+			return ret;
+	}
+	*out = be;
+
+	return 0;
 }
 
 static int should_write_log(struct reftable_ref_store *refs, const char *refname)
@@ -828,17 +845,17 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, stack, refname, oid, referent, type);
+	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -855,15 +872,15 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
+	struct reftable_backend *be;
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
 	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
 		strbuf_addstr(referent, ref.value.symref);
 	else
@@ -880,7 +897,7 @@ struct reftable_transaction_update {
 
 struct write_transaction_table_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	struct reftable_addition *addition;
 	struct reftable_transaction_update *updates;
 	size_t updates_nr;
@@ -915,27 +932,37 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
+	struct reftable_backend *be;
 	size_t i;
 	int ret;
 
+	/*
+	 * This function gets called in a loop, and we don't want to repeatedly
+	 * reload the stack for every single ref update. Instead, we manually
+	 * reload further down in the case where we haven't yet prepared the
+	 * specific `reftable_backend`.
+	 */
+	ret = backend_for(&be, refs, update->refname, NULL, 0);
+	if (ret)
+		return ret;
+
 	/*
 	 * Search for a preexisting stack update. If there is one then we add
 	 * the update to it, otherwise we set up a new stack update.
 	 */
 	for (i = 0; !arg && i < tx_data->args_nr; i++)
-		if (tx_data->args[i].stack == stack)
+		if (tx_data->args[i].be == be)
 			arg = &tx_data->args[i];
 
 	if (!arg) {
 		struct reftable_addition *addition;
 
-		ret = reftable_stack_reload(stack);
+		ret = reftable_stack_reload(be->stack);
 		if (ret)
 			return ret;
 
-		ret = reftable_stack_new_addition(&addition, stack,
+		ret = reftable_stack_new_addition(&addition, be->stack,
 						  REFTABLE_STACK_NEW_ADDITION_RELOAD);
 		if (ret) {
 			if (ret == REFTABLE_LOCK_ERROR)
@@ -947,7 +974,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 			   tx_data->args_alloc);
 		arg = &tx_data->args[tx_data->args_nr++];
 		arg->refs = refs;
-		arg->stack = stack;
+		arg->be = be;
 		arg->addition = addition;
 		arg->updates = NULL;
 		arg->updates_nr = 0;
@@ -1002,6 +1029,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	struct strbuf referent = STRBUF_INIT, head_referent = STRBUF_INIT;
 	struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
 	struct reftable_transaction_data *tx_data = NULL;
+	struct reftable_backend *be;
 	struct object_id head_oid;
 	unsigned int head_type = 0;
 	size_t i;
@@ -1048,7 +1076,22 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
+	/*
+	 * TODO: it's dubious whether we should reload the stack that "HEAD"
+	 * belongs to or not. In theory, it may happen that we only modify
+	 * stacks which are _not_ part of the "HEAD" stack. In that case we
+	 * wouldn't have prepared any transaction for its stack and would not
+	 * have reloaded it, which may mean that it is stale.
+	 *
+	 * On the other hand, reloading that stack without locking it feels
+	 * wrong to, as the value of "HEAD" could be modified concurrently at
+	 * any point in time.
+	 */
+	ret = backend_for(&be, refs, "HEAD", NULL, 0);
+	if (ret)
+		goto done;
+
+	ret = read_ref_without_reload(refs, be->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1057,10 +1100,18 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	for (i = 0; i < transaction->nr; i++) {
 		struct ref_update *u = transaction->updates[i];
 		struct object_id current_oid = {0};
-		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
+		/*
+		 * There is no need to reload the respective backends here as
+		 * we have already reloaded them when preparing the transaction
+		 * update. And given that the stacks have been locked there
+		 * shouldn't have been any concurrent modifications of the
+		 * stack.
+		 */
+		ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0);
+		if (ret)
+			goto done;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1116,7 +1167,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, stack, rewritten_ref,
+		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
 					      &current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
@@ -1318,7 +1369,7 @@ static int transaction_update_cmp(const void *a, const void *b)
 static int write_transaction_table(struct reftable_writer *writer, void *cb_data)
 {
 	struct write_transaction_table_arg *arg = cb_data;
-	uint64_t ts = reftable_stack_next_update_index(arg->stack);
+	uint64_t ts = reftable_stack_next_update_index(arg->be->stack);
 	struct reftable_log_record *logs = NULL;
 	struct ident_split committer_ident = {0};
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -1354,7 +1405,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			struct reftable_log_record log = {0};
 			struct reftable_iterator it = {0};
 
-			ret = reftable_stack_init_log_iterator(arg->stack, &it);
+			ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 			if (ret < 0)
 				goto done;
 
@@ -1799,10 +1850,9 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1814,10 +1864,11 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1831,10 +1882,9 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1845,10 +1895,11 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -2012,15 +2063,23 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	/*
+	 * TODO: we should adapt this callsite to reload the stack. There is no
+	 * obvious reason why we shouldn't.
+	 */
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2052,16 +2111,24 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	size_t logs_alloc = 0, logs_nr = 0, i;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	/*
+	 * TODO: we should adapt this callsite to reload the stack. There is no
+	 * obvious reason why we shouldn't.
+	 */
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2101,20 +2168,20 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	ret = refs->err;
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2186,10 +2253,9 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
@@ -2198,11 +2264,12 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		goto done;
+	arg.stack = be->stack;
 
-	ret = reftable_stack_add(stack, &write_reflog_existence_table, &arg);
+	ret = reftable_stack_add(be->stack, &write_reflog_existence_table, &arg);
 
 done:
 	return ret;
@@ -2260,17 +2327,18 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_delete_arg arg = {
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
-	ret = reftable_stack_add(stack, &write_reflog_delete_table, &arg);
+	arg.stack = be->stack;
+
+	ret = reftable_stack_add(be->stack, &write_reflog_delete_table, &arg);
 
 	assert(ret != REFTABLE_API_ERROR);
 	return ret;
@@ -2369,13 +2437,13 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
+	struct reftable_backend *be;
 	struct object_id oid = {0};
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -2384,11 +2452,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2396,11 +2464,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_new_addition(&add, stack, 0);
+	ret = reftable_stack_new_addition(&add, be->stack, 0);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref_record);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
 	if (ret < 0)
 		goto done;
 	if (reftable_ref_record_val1(&ref_record))
@@ -2479,8 +2547,8 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	arg.refs = refs;
 	arg.records = rewritten;
 	arg.len = logs_nr;
-	arg.stack = stack,
-	arg.refname = refname,
+	arg.stack = be->stack;
+	arg.refname = refname;
 
 	ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg);
 	if (ret < 0)

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 3/9] reftable/stack: add accessor for the hash ID
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 1/9] refs/reftable: encapsulate reftable stack Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 2/9] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
                     ` (6 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Add an accessor function that allows callers to access the hash ID of a
reftable stack. This function will be used in a subsequent commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reftable-stack.h | 3 +++
 reftable/stack.c          | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
index 54787f2ef530406a7970db058c3a0cf456897978..ae14270ea74108cd4c314ec38e7d5c9a4e731481 100644
--- a/reftable/reftable-stack.h
+++ b/reftable/reftable-stack.h
@@ -149,4 +149,7 @@ struct reftable_compaction_stats {
 struct reftable_compaction_stats *
 reftable_stack_compaction_stats(struct reftable_stack *st);
 
+/* Return the hash of the stack. */
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
+
 #endif
diff --git a/reftable/stack.c b/reftable/stack.c
index 1fffd75630266c0d3d1e4a2d037b5cf90455529d..d97b64a40d4ad05cfd9e6f33e8ba1e713281ef6d 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1791,3 +1791,8 @@ int reftable_stack_clean(struct reftable_stack *st)
 	reftable_addition_destroy(add);
 	return err;
 }
+
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
+{
+	return reftable_merged_table_hash_id(st->merged);
+}

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend`
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (2 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 3/9] reftable/stack: add accessor for the hash ID Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-26  0:48     ` Junio C Hamano
  2024-11-25  7:38   ` [PATCH v3 5/9] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
                     ` (5 subsequent siblings)
  9 siblings, 1 reply; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
as input instead of accepting a `struct reftable_stack`. This allows us
to implement an additional caching layer when reading refs where we can
reuse reftable iterators.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 110 ++++++++++++++++++++++++++----------------------
 1 file changed, 59 insertions(+), 51 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 64fe8fd02d8ec932f7980cdb7d7d5223c3c83b73..5933c561f5c422d12b616514ed76b75c52a13477 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
 	be->stack = NULL;
 }
 
+static int reftable_backend_read_ref(struct reftable_backend *be,
+				     const char *refname,
+				     struct object_id *oid,
+				     struct strbuf *referent,
+				     unsigned int *type)
+{
+	struct reftable_ref_record ref = {0};
+	int ret;
+
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (ret)
+		goto done;
+
+	if (ref.value_type == REFTABLE_REF_SYMREF) {
+		strbuf_reset(referent);
+		strbuf_addstr(referent, ref.value.symref);
+		*type |= REF_ISSYMREF;
+	} else if (reftable_ref_record_val1(&ref)) {
+		unsigned int hash_id;
+
+		switch (reftable_stack_hash_id(be->stack)) {
+		case REFTABLE_HASH_SHA1:
+			hash_id = GIT_HASH_SHA1;
+			break;
+		case REFTABLE_HASH_SHA256:
+			hash_id = GIT_HASH_SHA256;
+			break;
+		default:
+			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
+		}
+
+		oidread(oid, reftable_ref_record_val1(&ref),
+			&hash_algos[hash_id]);
+	} else {
+		/* We got a tombstone, which should not happen. */
+		BUG("unhandled reference value type %d", ref.value_type);
+	}
+
+done:
+	assert(ret != REFTABLE_API_ERROR);
+	reftable_ref_record_release(&ref);
+	return ret;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -243,38 +287,6 @@ static void fill_reftable_log_record(struct reftable_log_record *log, const stru
 	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
-static int read_ref_without_reload(struct reftable_ref_store *refs,
-				   struct reftable_stack *stack,
-				   const char *refname,
-				   struct object_id *oid,
-				   struct strbuf *referent,
-				   unsigned int *type)
-{
-	struct reftable_ref_record ref = {0};
-	int ret;
-
-	ret = reftable_stack_read_ref(stack, refname, &ref);
-	if (ret)
-		goto done;
-
-	if (ref.value_type == REFTABLE_REF_SYMREF) {
-		strbuf_reset(referent);
-		strbuf_addstr(referent, ref.value.symref);
-		*type |= REF_ISSYMREF;
-	} else if (reftable_ref_record_val1(&ref)) {
-		oidread(oid, reftable_ref_record_val1(&ref),
-			refs->base.repo->hash_algo);
-	} else {
-		/* We got a tombstone, which should not happen. */
-		BUG("unhandled reference value type %d", ref.value_type);
-	}
-
-done:
-	assert(ret != REFTABLE_API_ERROR);
-	reftable_ref_record_release(&ref);
-	return ret;
-}
-
 static int reftable_be_config(const char *var, const char *value,
 			      const struct config_context *ctx,
 			      void *_opts)
@@ -855,7 +867,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
+	ret = reftable_backend_read_ref(be, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -1091,8 +1103,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	if (ret)
 		goto done;
 
-	ret = read_ref_without_reload(refs, be->stack, "HEAD",
-				      &head_oid, &head_referent, &head_type);
+	ret = reftable_backend_read_ref(be, "HEAD", &head_oid,
+					&head_referent, &head_type);
 	if (ret < 0)
 		goto done;
 	ret = 0;
@@ -1167,8 +1179,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
-					      &current_oid, &referent, &u->type);
+		ret = reftable_backend_read_ref(be, rewritten_ref,
+						&current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
 		if (ret > 0 && !ref_update_expects_existing_old_ref(u)) {
@@ -1626,7 +1638,7 @@ struct write_create_symref_arg {
 
 struct write_copy_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	const char *oldname;
 	const char *newname;
 	const char *logmsg;
@@ -1651,7 +1663,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
 		BUG("failed splitting committer info");
 
-	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
+	if (reftable_stack_read_ref(arg->be->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
 	}
@@ -1690,7 +1702,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * the old branch and the creation of the new branch, and we cannot do
 	 * two changes to a reflog in a single update.
 	 */
-	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack);
+	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->be->stack);
 	if (arg->delete_old)
 		creation_ts++;
 	reftable_writer_set_limits(writer, deletion_ts, creation_ts);
@@ -1733,8 +1745,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
-		ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid,
-					      &head_referent, &head_type);
+		ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid,
+						&head_referent, &head_type);
 		if (ret < 0)
 			goto done;
 		append_head_reflog = (head_type & REF_ISSYMREF) && !strcmp(head_referent.buf, arg->oldname);
@@ -1777,7 +1789,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * copy over all log entries from the old reflog. Last but not least,
 	 * when renaming we also have to delete all the old reflog entries.
 	 */
-	ret = reftable_stack_init_log_iterator(arg->stack, &it);
+	ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -1850,7 +1862,6 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1864,11 +1875,10 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1882,7 +1892,6 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1895,11 +1904,10 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 5/9] refs/reftable: refactor reading symbolic refs to use reftable backend
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (3 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 6/9] refs/reftable: refactor reflog expiry " Patrick Steinhardt
                     ` (4 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Refactor the callback function that reads symbolic references in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 5933c561f5c422d12b616514ed76b75c52a13477..498bc9f932673e6089bd3b27e1bb7ed8d0e36a4c 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -884,21 +884,18 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_ref_record ref = {0};
 	struct reftable_backend *be;
+	struct object_id oid;
+	unsigned int type = 0;
 	int ret;
 
 	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
-	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
-		strbuf_addstr(referent, ref.value.symref);
-	else
+	ret = reftable_backend_read_ref(be, refname, &oid, referent, &type);
+	if (type != REF_ISSYMREF)
 		ret = -1;
-
-	reftable_ref_record_release(&ref);
 	return ret;
 }
 

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 6/9] refs/reftable: refactor reflog expiry to use reftable backend
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (4 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 5/9] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 7/9] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
                     ` (3 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Refactor the callback function that expires reflog entries in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 498bc9f932673e6089bd3b27e1bb7ed8d0e36a4c..ff49c27a6a0f4cb1e5b3bfaa3d34d3302c1bdb2e 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2444,14 +2444,15 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
-	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
 	struct reftable_backend *be;
 	struct object_id oid = {0};
+	struct strbuf referent = STRBUF_INIT;
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	unsigned int type = 0;
 	int ret;
 
 	if (refs->err < 0)
@@ -2473,12 +2474,9 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
+	ret = reftable_backend_read_ref(be, refname, &oid, &referent, &type);
 	if (ret < 0)
 		goto done;
-	if (reftable_ref_record_val1(&ref_record))
-		oidread(&oid, reftable_ref_record_val1(&ref_record),
-			ref_store->repo->hash_algo);
 	prepare_fn(refname, &oid, policy_cb_data);
 
 	while (1) {
@@ -2545,8 +2543,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		}
 	}
 
-	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash &&
-	    reftable_ref_record_val1(&ref_record))
+	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash && !is_null_oid(&oid))
 		oidread(&arg.update_oid, last_hash, ref_store->repo->hash_algo);
 
 	arg.refs = refs;
@@ -2571,11 +2568,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		cleanup_fn(policy_cb_data);
 	assert(ret != REFTABLE_API_ERROR);
 
-	reftable_ref_record_release(&ref_record);
 	reftable_iterator_destroy(&it);
 	reftable_addition_destroy(add);
 	for (i = 0; i < logs_nr; i++)
 		reftable_log_record_release(&logs[i]);
+	strbuf_release(&referent);
 	free(logs);
 	free(rewritten);
 	return ret;

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 7/9] reftable/stack: add mechanism to notify callers on reload
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (5 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 6/9] refs/reftable: refactor reflog expiry " Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 8/9] reftable/merged: drain priority queue on reseek Patrick Steinhardt
                     ` (2 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Reftable stacks are reloaded in two cases:

  - When calling `reftable_stack_reload()`, if the stat-cache tells us
    that the stack has been modified.

  - When committing a reftable addition.

While callers can figure out the second case, they do not have a
mechanism to figure out whether `reftable_stack_reload()` led to an
actual reload of the on-disk data. All they can do is thus to assume
that data is always being reloaded in that case.

Improve the situation by introducing a new `on_reload()` callback to the
reftable options. If provided, the function will be invoked every time
the stack has indeed been reloaded. This allows callers to invalidate
data that depends on the current stack data.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reftable-writer.h | 9 +++++++++
 reftable/stack.c           | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h
index c85ef5a5bd14595d75f99457fef4407040e197c5..5f9afa620bb00de66c311765fb0ae8c6f56401ae 100644
--- a/reftable/reftable-writer.h
+++ b/reftable/reftable-writer.h
@@ -68,6 +68,15 @@ struct reftable_write_options {
 	 * fsync(3P) when unset.
 	 */
 	int (*fsync)(int fd);
+
+	/*
+	 * Callback function to execute whenever the stack is being reloaded.
+	 * This can be used e.g. to discard cached information that relies on
+	 * the old stack's data. The payload data will be passed as argument to
+	 * the callback.
+	 */
+	void (*on_reload)(void *payload);
+	void *on_reload_payload;
 };
 
 /* reftable_block_stats holds statistics for a single block type */
diff --git a/reftable/stack.c b/reftable/stack.c
index d97b64a40d4ad05cfd9e6f33e8ba1e713281ef6d..5384ca9de0d1f064aebcb09308a74cc397b37463 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -548,6 +548,10 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
 		close(fd);
 	free_names(names);
 	free_names(names_after);
+
+	if (st->opts.on_reload)
+		st->opts.on_reload(st->opts.on_reload_payload);
+
 	return err;
 }
 

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 8/9] reftable/merged: drain priority queue on reseek
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (6 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 7/9] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  7:38   ` [PATCH v3 9/9] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  2024-11-25  9:47   ` [PATCH v3 0/9] " Christian Couder
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

In 5bf96e0c39 (reftable/generic: move seeking of records into the
iterator, 2024-05-13) we have refactored the reftable codebase such that
iterators can be initialized once and then re-seeked multiple times.
This feature is used by 1869525066 (refs/reftable: wire up support for
exclude patterns, 2024-09-16) in order to skip records based on exclude
patterns provided by the caller.

The logic to re-seek the merged iterator is insufficient though because
we don't drain the priority queue on a re-seek. This means that the
queue may contain stale entries and thus reading the next record in the
queue will return the wrong entry. While this is an obvious bug, it is
harmless in the context of above exclude patterns:

  - If the queue contained stale entries that match the pattern then the
    caller would already know to filter out such refs. This is because
    our codebase is prepared to handle backends that don't have a way to
    efficiently implement exclude patterns.

  - If the queue contained stale entries that don't match the pattern
    we'd eventually filter out any duplicates. This is because the
    reftable code discards items with the same ref name and sorts any
    remaining entries properly.

So things happen to work in this context regardless of the bug, and
there is no other use case yet where we re-seek iterators. We're about
to introduce a caching mechanism though where iterators are reused by
the reftable backend, and that will expose the bug.

Fix the issue by draining the priority queue when seeking and add a
testcase that surfaces the issue.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/merged.c                |  2 ++
 t/unit-tests/t-reftable-merged.c | 73 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/reftable/merged.c b/reftable/merged.c
index 5b93e20f42945300abbc1a036bbdf067fced7854..bb0836e3443271f9c0d5ba5582c78694d437ddc2 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -66,6 +66,8 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want
 	int err;
 
 	mi->advance_index = -1;
+	while (!merged_iter_pqueue_is_empty(mi->pq))
+		merged_iter_pqueue_remove(&mi->pq);
 
 	for (size_t i = 0; i < mi->subiters_len; i++) {
 		err = iterator_seek(&mi->subiters[i].iter, want);
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 2591b5e59745536a205271491f747875e04c5a3f..a12bd0e1a3bdeda82bcbc6259e679df4f232e3e2 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -273,6 +273,78 @@ static void t_merged_seek_multiple_times(void)
 	reftable_free(sources);
 }
 
+static void t_merged_seek_multiple_times_without_draining(void)
+{
+	struct reftable_ref_record r1[] = {
+		{
+			.refname = (char *) "a",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 1 },
+		},
+		{
+			.refname = (char *) "c",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 2 },
+		}
+	};
+	struct reftable_ref_record r2[] = {
+		{
+			.refname = (char *) "b",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 3 },
+		},
+		{
+			.refname = (char *) "d",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 4 },
+		},
+	};
+	struct reftable_ref_record *refs[] = {
+		r1, r2,
+	};
+	size_t sizes[] = {
+		ARRAY_SIZE(r1), ARRAY_SIZE(r2),
+	};
+	struct reftable_buf bufs[] = {
+		REFTABLE_BUF_INIT, REFTABLE_BUF_INIT,
+	};
+	struct reftable_block_source *sources = NULL;
+	struct reftable_reader **readers = NULL;
+	struct reftable_ref_record rec = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_merged_table *mt;
+	int err;
+
+	mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2);
+	merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
+
+	err = reftable_iterator_seek_ref(&it, "b");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
+	check(err == 1);
+
+	err = reftable_iterator_seek_ref(&it, "a");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
+	check(err == 1);
+
+	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
+		reftable_buf_release(&bufs[i]);
+	readers_destroy(readers, ARRAY_SIZE(refs));
+	reftable_ref_record_release(&rec);
+	reftable_iterator_destroy(&it);
+	reftable_merged_table_free(mt);
+	reftable_free(sources);
+}
+
 static struct reftable_merged_table *
 merged_table_from_log_records(struct reftable_log_record **logs,
 			      struct reftable_block_source **source,
@@ -467,6 +539,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
 	TEST(t_merged_logs(), "merged table with multiple log updates for same ref");
 	TEST(t_merged_refs(), "merged table with multiple updates to same ref");
 	TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times");
+	TEST(t_merged_seek_multiple_times_without_draining(), "merged table can seek multiple times without draining");
 	TEST(t_merged_single_record(), "ref occurring in only one record can be fetched");
 
 	return test_done();

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v3 9/9] refs/reftable: reuse iterators when reading refs
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (7 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 8/9] reftable/merged: drain priority queue on reseek Patrick Steinhardt
@ 2024-11-25  7:38   ` Patrick Steinhardt
  2024-11-25  9:47   ` [PATCH v3 0/9] " Christian Couder
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  7:38 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

When reading references the reftable backend has to:

  1. Create a new ref iterator.

  2. Seek the iterator to the record we're searching for.

  3. Read the record.

We cannot really avoid the last two steps, but re-creating the iterator
every single time we want to read a reference is kind of expensive and a
waste of resources. We couldn't help it in the past though because it
was not possible to reuse iterators. But starting with 5bf96e0c39
(reftable/generic: move seeking of records into the iterator,
2024-05-13) we have split up the iterator lifecycle such that creating
the iterator and seeking are two different concerns.

Refactor the code such that we cache iterators in the reftable backend.
This cache is invalidated whenever the respective stack is reloaded such
that we know to recreate the iterator in that case. This leads to a
sizeable speedup when creating many refs, which requires a lot of random
reference reads:

    Benchmark 1: update-ref: create many refs (refcount = 100000, revision = master)
      Time (mean ± σ):      1.793 s ±  0.010 s    [User: 0.954 s, System: 0.835 s]
      Range (min … max):    1.781 s …  1.811 s    10 runs

    Benchmark 2: update-ref: create many refs (refcount = 100000, revision = HEAD)
      Time (mean ± σ):      1.680 s ±  0.013 s    [User: 0.846 s, System: 0.831 s]
      Range (min … max):    1.664 s …  1.702 s    10 runs

    Summary
      update-ref: create many refs (refcount = 100000, revision = HEAD) ran
        1.07 ± 0.01 times faster than update-ref: create many refs (refcount = 100000, revision = master)

While 7% is not a huge win, you have to consider that the benchmark is
_writing_ data, so _reading_ references is only one part of what we do.
Flame graphs show that we spend around 40% of our time reading refs, so
the speedup when reading refs is approximately ~2.5x that. I could not
find better benchmarks where we perform a lot of random ref reads.

You can also see a sizeable impact on memory usage when creating 100k
references. Before this change:

    HEAP SUMMARY:
        in use at exit: 19,112,538 bytes in 200,170 blocks
      total heap usage: 8,400,426 allocs, 8,200,256 frees, 454,367,048 bytes allocated

After this change:

    HEAP SUMMARY:
        in use at exit: 674,416 bytes in 169 blocks
      total heap usage: 7,929,872 allocs, 7,929,703 frees, 281,509,985 bytes allocated

As an additional factor, this refactoring opens up the possibility for
more performance optimizations in how we re-seek iterators. Any change
that allows us to optimize re-seeking by e.g. reusing data structures
would thus also directly speed up random reads.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index ff49c27a6a0f4cb1e5b3bfaa3d34d3302c1bdb2e..417c6c4955672ef69fe956e2e6dd8dbd1dd15a3c 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -36,19 +36,30 @@
 
 struct reftable_backend {
 	struct reftable_stack *stack;
+	struct reftable_iterator it;
 };
 
+static void reftable_backend_on_reload(void *payload)
+{
+	struct reftable_backend *be = payload;
+	reftable_iterator_destroy(&be->it);
+}
+
 static int reftable_backend_init(struct reftable_backend *be,
 				 const char *path,
-				 const struct reftable_write_options *opts)
+				 const struct reftable_write_options *_opts)
 {
-	return reftable_new_stack(&be->stack, path, opts);
+	struct reftable_write_options opts = *_opts;
+	opts.on_reload = reftable_backend_on_reload;
+	opts.on_reload_payload = be;
+	return reftable_new_stack(&be->stack, path, &opts);
 }
 
 static void reftable_backend_release(struct reftable_backend *be)
 {
 	reftable_stack_destroy(be->stack);
 	be->stack = NULL;
+	reftable_iterator_destroy(&be->it);
 }
 
 static int reftable_backend_read_ref(struct reftable_backend *be,
@@ -60,10 +71,25 @@ static int reftable_backend_read_ref(struct reftable_backend *be,
 	struct reftable_ref_record ref = {0};
 	int ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (!be->it.ops) {
+		ret = reftable_stack_init_ref_iterator(be->stack, &be->it);
+		if (ret)
+			goto done;
+	}
+
+	ret = reftable_iterator_seek_ref(&be->it, refname);
 	if (ret)
 		goto done;
 
+	ret = reftable_iterator_next_ref(&be->it, &ref);
+	if (ret)
+		goto done;
+
+	if (strcmp(ref.refname, refname)) {
+		ret = 1;
+		goto done;
+	}
+
 	if (ref.value_type == REFTABLE_REF_SYMREF) {
 		strbuf_reset(referent);
 		strbuf_addstr(referent, ref.value.symref);

-- 
2.47.0.274.g962d0b743d.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 0/9] refs/reftable: reuse iterators when reading refs
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
                     ` (8 preceding siblings ...)
  2024-11-25  7:38   ` [PATCH v3 9/9] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
@ 2024-11-25  9:47   ` Christian Couder
  2024-11-25  9:52     ` Patrick Steinhardt
  9 siblings, 1 reply; 57+ messages in thread
From: Christian Couder @ 2024-11-25  9:47 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, karthik nayak, Junio C Hamano

Hi,

On Mon, Nov 25, 2024 at 8:38 AM Patrick Steinhardt <ps@pks.im> wrote:
>
> Hi,
>
> this is the second version of my patch series that refactors the

Do you mean that despite the "v3" mark, it's only the second version
because "v2" was just rebasing "v1" on top of a better base?

> reftable backend to reuse iterators when reading random references. This
> removes the overhead of having to recreate the iterator on every read
> and thus leads to better performance and less allocation churn.
>
> Changes in v3:
>
>   - Adapt some comments to refer to the "backend" instead of to the
>     "stack".
>   - Fix indentation of a statement while at it.
>   - Explain why callsites don't want to reload the stack.
>   - Optimize `prepare_transaction_update()` by not using `backend_for()`
>     twice, but instead reload the stack manually.
>   - Split out the change that adds `reftable_stack_hash_id()` into a
>     separate commit.
>   - Link to v2: https://lore.kernel.org/r/cover.1730792627.git.ps@pks.im

Thanks!

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 0/9] refs/reftable: reuse iterators when reading refs
  2024-11-25  9:47   ` [PATCH v3 0/9] " Christian Couder
@ 2024-11-25  9:52     ` Patrick Steinhardt
  0 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-25  9:52 UTC (permalink / raw)
  To: Christian Couder; +Cc: git, karthik nayak, Junio C Hamano

On Mon, Nov 25, 2024 at 10:47:57AM +0100, Christian Couder wrote:
> Hi,
> 
> On Mon, Nov 25, 2024 at 8:38 AM Patrick Steinhardt <ps@pks.im> wrote:
> >
> > Hi,
> >
> > this is the second version of my patch series that refactors the
> 
> Do you mean that despite the "v3" mark, it's only the second version
> because "v2" was just rebasing "v1" on top of a better base?

Nah, I simply forgot to update this introduction :)

Patrick

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 2/9] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-25  7:38   ` [PATCH v3 2/9] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-26  0:31     ` Junio C Hamano
  0 siblings, 0 replies; 57+ messages in thread
From: Junio C Hamano @ 2024-11-26  0:31 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, karthik nayak

Patrick Steinhardt <ps@pks.im> writes:

> +	 * On the other hand, reloading that stack without locking it feels
> +	 * wrong to, as the value of "HEAD" could be modified concurrently at
> +	 * any point in time.
> +	 */

"wrong to," -> "wrong too,", probably.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend`
  2024-11-25  7:38   ` [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-26  0:48     ` Junio C Hamano
  2024-11-26  6:41       ` Patrick Steinhardt
  0 siblings, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2024-11-26  0:48 UTC (permalink / raw)
  To: Patrick Steinhardt; +Cc: git, karthik nayak

Patrick Steinhardt <ps@pks.im> writes:

> Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
> as input instead of accepting a `struct reftable_stack`.

Hmph...  am I supposed to be puzzled that the patch replaces A with
A?

All callers of read_ref_without_reload() now call
reftable_backend_read_ref().  

The former took <reftable_ref_store, reftable_stack> while the
latter take <reftable_backend>.  They both take <refname, oid,
referent, type>, of course, because the former is replaced by the
latter.

OK, so we introduce a new function, and instead of passing ref-store
and stack, the new function only takes reftable-backend (which has a
stack contained in it).

The old function used ref-store only to find out the hash algorithm
via its base repository.  Since the hash algorithm can be found from
the stack that is in the backend, the new function does not take a
ref-store.  FWIW, the old function did not have to take one either,
but since we are getting rid of the old function altogether, that is
fine ;-)

> This allows us
> to implement an additional caching layer when reading refs where we can
> reuse reftable iterators.

OK.

>

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend`
  2024-11-26  0:48     ` Junio C Hamano
@ 2024-11-26  6:41       ` Patrick Steinhardt
  0 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, karthik nayak

On Tue, Nov 26, 2024 at 09:48:49AM +0900, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> 
> > Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
> > as input instead of accepting a `struct reftable_stack`.
> 
> Hmph...  am I supposed to be puzzled that the patch replaces A with
> A?

Oops, no. The first one should of course be `struct reftable_backend`.

> All callers of read_ref_without_reload() now call
> reftable_backend_read_ref().  
> 
> The former took <reftable_ref_store, reftable_stack> while the
> latter take <reftable_backend>.  They both take <refname, oid,
> referent, type>, of course, because the former is replaced by the
> latter.
> 
> OK, so we introduce a new function, and instead of passing ref-store
> and stack, the new function only takes reftable-backend (which has a
> stack contained in it).

Yes.

> The old function used ref-store only to find out the hash algorithm
> via its base repository.  Since the hash algorithm can be found from
> the stack that is in the backend, the new function does not take a
> ref-store.  FWIW, the old function did not have to take one either,
> but since we are getting rid of the old function altogether, that is
> fine ;-)

You know, let me maybe split out this change into a separate commit.
With your comments it's rather obvious that this commit does too many
things at once.

Patrick

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 00/10] refs/reftable: reuse iterators when reading refs
  2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
                   ` (10 preceding siblings ...)
  2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
@ 2024-11-26  6:42 ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 01/10] refs/reftable: encapsulate reftable stack Patrick Steinhardt
                     ` (9 more replies)
  11 siblings, 10 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Hi,

this patch series refactors the reftable backend to reuse reftable
iterators when reading random references. This removes the overhead of
having to recreate the iterator on every read and thus leads to better
performance and less allocation churn. It also gives us the ability to
further optimize reads by optimizing re-seeking iterators in the future.

Overall this leads to a 7% speedup when creating many refs in a
transaction, which performs many random reads. But this change also
positively impacts other usecases.

Changes in v3:

  - Adapt some comments to refer to the "backend" instead of to the
    "stack".
  - Fix indentation of a statement while at it.
  - Explain why callsites don't want to reload the stack.
  - Optimize `prepare_transaction_update()` by not using `backend_for()`
    twice, but instead reload the stack manually.
  - Split out the change that adds `reftable_stack_hash_id()` into a
    separate commit.
  - Link to v2: https://lore.kernel.org/r/cover.1730792627.git.ps@pks.im

Changes in v4:

  - Split up the introduction of `reftable_backend_read_ref()` into two
    commits: one that gets rid of the `struct reftable_ref_store`
    parameter and one that converts the function to accept a `struct
    reftable_backend`.
  - Fix a comment typo.
  - Link to v3: https://lore.kernel.org/r/20241125-pks-reftable-backend-reuse-iter-v3-0-1d7b658e3e9e@pks.im

Thanks!

Patrick

---
Patrick Steinhardt (10):
      refs/reftable: encapsulate reftable stack
      refs/reftable: handle reloading stacks in the reftable backend
      reftable/stack: add accessor for the hash ID
      refs/reftable: figure out hash via `reftable_stack`
      refs/reftable: read references via `struct reftable_backend`
      refs/reftable: refactor reading symbolic refs to use reftable backend
      refs/reftable: refactor reflog expiry to use reftable backend
      reftable/stack: add mechanism to notify callers on reload
      reftable/merged: drain priority queue on reseek
      refs/reftable: reuse iterators when reading refs

 refs/reftable-backend.c          | 409 +++++++++++++++++++++++++--------------
 reftable/merged.c                |   2 +
 reftable/reftable-stack.h        |   3 +
 reftable/reftable-writer.h       |   9 +
 reftable/stack.c                 |   9 +
 t/unit-tests/t-reftable-merged.c |  73 +++++++
 6 files changed, 357 insertions(+), 148 deletions(-)

Range-diff versus v3:

 1:  3db0ba3eb5 =  1:  ec0b7e35c8 refs/reftable: encapsulate reftable stack
 2:  556eb8301c !  2:  437304908b refs/reftable: handle reloading stacks in the reftable backend
    @@ refs/reftable-backend.c: static int reftable_be_transaction_prepare(struct ref_s
     +	 * have reloaded it, which may mean that it is stale.
     +	 *
     +	 * On the other hand, reloading that stack without locking it feels
    -+	 * wrong to, as the value of "HEAD" could be modified concurrently at
    ++	 * wrong, too, as the value of "HEAD" could be modified concurrently at
     +	 * any point in time.
     +	 */
     +	ret = backend_for(&be, refs, "HEAD", NULL, 0);
 3:  e966b9acf2 =  3:  0bd9aea884 reftable/stack: add accessor for the hash ID
 -:  ---------- >  4:  d8b801503f refs/reftable: figure out hash via `reftable_stack`
 4:  de9f52a5b2 !  5:  a57210ac46 refs/reftable: read references via `struct reftable_backend`
    @@ Metadata
      ## Commit message ##
         refs/reftable: read references via `struct reftable_backend`
     
    -    Refactor `read_ref_without_reload()` to accept a `struct reftable_stack`
    -    as input instead of accepting a `struct reftable_stack`. This allows us
    -    to implement an additional caching layer when reading refs where we can
    -    reuse reftable iterators.
    +    Refactor `read_ref_without_reload()` to accept `struct reftable_backend`
    +    as parameter instead of `struct reftable_stack`. Rename the function to
    +    `reftable_backend_read_ref()` to clarify its scope and move it close to
    +    other functions operating on `struct reftable_backend`.
    +
    +    This change allows us to implement an additional caching layer when
    +    reading refs where we can reuse reftable iterators.
     
         Signed-off-by: Patrick Steinhardt <ps@pks.im>
     
    @@ refs/reftable-backend.c: static void fill_reftable_log_record(struct reftable_lo
      	log->value.update.tz_offset = sign * atoi(tz_begin);
      }
      
    --static int read_ref_without_reload(struct reftable_ref_store *refs,
    --				   struct reftable_stack *stack,
    +-static int read_ref_without_reload(struct reftable_stack *stack,
     -				   const char *refname,
     -				   struct object_id *oid,
     -				   struct strbuf *referent,
    @@ refs/reftable-backend.c: static void fill_reftable_log_record(struct reftable_lo
     -		strbuf_addstr(referent, ref.value.symref);
     -		*type |= REF_ISSYMREF;
     -	} else if (reftable_ref_record_val1(&ref)) {
    +-		unsigned int hash_id;
    +-
    +-		switch (reftable_stack_hash_id(stack)) {
    +-		case REFTABLE_HASH_SHA1:
    +-			hash_id = GIT_HASH_SHA1;
    +-			break;
    +-		case REFTABLE_HASH_SHA256:
    +-			hash_id = GIT_HASH_SHA256;
    +-			break;
    +-		default:
    +-			BUG("unhandled hash ID %d", reftable_stack_hash_id(stack));
    +-		}
    +-
     -		oidread(oid, reftable_ref_record_val1(&ref),
    --			refs->base.repo->hash_algo);
    +-			&hash_algos[hash_id]);
     -	} else {
     -		/* We got a tombstone, which should not happen. */
     -		BUG("unhandled reference value type %d", ref.value_type);
    @@ refs/reftable-backend.c: static int reftable_be_read_raw_ref(struct ref_store *r
      	if (ret)
      		return ret;
      
    --	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
    +-	ret = read_ref_without_reload(be->stack, refname, oid, referent, type);
     +	ret = reftable_backend_read_ref(be, refname, oid, referent, type);
      	if (ret < 0)
      		return ret;
    @@ refs/reftable-backend.c: static int reftable_be_transaction_prepare(struct ref_s
      	if (ret)
      		goto done;
      
    --	ret = read_ref_without_reload(refs, be->stack, "HEAD",
    +-	ret = read_ref_without_reload(be->stack, "HEAD",
     -				      &head_oid, &head_referent, &head_type);
     +	ret = reftable_backend_read_ref(be, "HEAD", &head_oid,
     +					&head_referent, &head_type);
    @@ refs/reftable-backend.c: static int reftable_be_transaction_prepare(struct ref_s
      			string_list_insert(&affected_refnames, new_update->refname);
      		}
      
    --		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
    +-		ret = read_ref_without_reload(be->stack, rewritten_ref,
     -					      &current_oid, &referent, &u->type);
     +		ret = reftable_backend_read_ref(be, rewritten_ref,
     +						&current_oid, &referent, &u->type);
    @@ refs/reftable-backend.c: static int write_copy_table(struct reftable_writer *wri
      		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
      		logs_nr++;
      
    --		ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid,
    +-		ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid,
     -					      &head_referent, &head_type);
     +		ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid,
     +						&head_referent, &head_type);
 5:  6e7db8e3fc =  6:  432f75ef01 refs/reftable: refactor reading symbolic refs to use reftable backend
 6:  687afb781a =  7:  1ca28e00b1 refs/reftable: refactor reflog expiry to use reftable backend
 7:  e1d4bdc3e8 =  8:  a95b67f1f3 reftable/stack: add mechanism to notify callers on reload
 8:  8eccf308bd =  9:  cbaf95ff15 reftable/merged: drain priority queue on reseek
 9:  942957447a = 10:  013f05e08b refs/reftable: reuse iterators when reading refs

---
base-commit: 455ddbf8c6a694968c1089fb6c7ffb1d31d97e9d
change-id: 20241125-pks-reftable-backend-reuse-iter-3a2e92428789


^ permalink raw reply	[flat|nested] 57+ messages in thread

* [PATCH v4 01/10] refs/reftable: encapsulate reftable stack
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 02/10] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
                     ` (8 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

The reftable ref store needs to keep track of multiple stacks, one for
the main worktree and an arbitrary number of stacks for worktrees. This
is done by storing pointers to `struct reftable_stack`, which we then
access directly.

Wrap the stack in a new `struct reftable_backend`. This will allow us to
attach more data to each respective stack in subsequent commits.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 135 +++++++++++++++++++++++++++---------------------
 1 file changed, 76 insertions(+), 59 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index f560bc2b67857d785294e6b5699383a256f30813..acd26f8928d18396f78a2d39ad0e0c1796d5a409 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -34,24 +34,41 @@
  */
 #define REF_UPDATE_VIA_HEAD (1 << 8)
 
+struct reftable_backend {
+	struct reftable_stack *stack;
+};
+
+static int reftable_backend_init(struct reftable_backend *be,
+				 const char *path,
+				 const struct reftable_write_options *opts)
+{
+	return reftable_new_stack(&be->stack, path, opts);
+}
+
+static void reftable_backend_release(struct reftable_backend *be)
+{
+	reftable_stack_destroy(be->stack);
+	be->stack = NULL;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
 	/*
-	 * The main stack refers to the common dir and thus contains common
+	 * The main backend refers to the common dir and thus contains common
 	 * refs as well as refs of the main repository.
 	 */
-	struct reftable_stack *main_stack;
+	struct reftable_backend main_backend;
 	/*
-	 * The worktree stack refers to the gitdir in case the refdb is opened
+	 * The worktree backend refers to the gitdir in case the refdb is opened
 	 * via a worktree. It thus contains the per-worktree refs.
 	 */
-	struct reftable_stack *worktree_stack;
+	struct reftable_backend worktree_backend;
 	/*
-	 * Map of worktree stacks by their respective worktree names. The map
+	 * Map of worktree backends by their respective worktree names. The map
 	 * is populated lazily when we try to resolve `worktrees/$worktree` refs.
 	 */
-	struct strmap worktree_stacks;
+	struct strmap worktree_backends;
 	struct reftable_write_options write_options;
 
 	unsigned int store_flags;
@@ -97,21 +114,21 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_stack *stack_for(struct reftable_ref_store *store,
-					const char *refname,
-					const char **rewritten_ref)
+static struct reftable_backend *backend_for(struct reftable_ref_store *store,
+					    const char *refname,
+					    const char **rewritten_ref)
 {
 	const char *wtname;
 	int wtname_len;
 
 	if (!refname)
-		return store->main_stack;
+		return &store->main_backend;
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_stack *stack;
+		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -125,37 +142,39 @@ static struct reftable_stack *stack_for(struct reftable_ref_store *store,
 		/*
 		 * There is an edge case here: when the worktree references the
 		 * current worktree, then we set up the stack once via
-		 * `worktree_stacks` and once via `worktree_stack`. This is
+		 * `worktree_backends` and once via `worktree_backend`. This is
 		 * wasteful, but in the reading case it shouldn't matter. And
 		 * in the writing case we would notice that the stack is locked
 		 * already and error out when trying to write a reference via
 		 * both stacks.
 		 */
-		stack = strmap_get(&store->worktree_stacks, wtname_buf.buf);
-		if (!stack) {
+		be = strmap_get(&store->worktree_backends, wtname_buf.buf);
+		if (!be) {
 			strbuf_addf(&wt_dir, "%s/worktrees/%s/reftable",
 				    store->base.repo->commondir, wtname_buf.buf);
 
-			store->err = reftable_new_stack(&stack, wt_dir.buf,
-							&store->write_options);
+			CALLOC_ARRAY(be, 1);
+			store->err = reftable_backend_init(be, wt_dir.buf,
+							   &store->write_options);
 			assert(store->err != REFTABLE_API_ERROR);
-			strmap_put(&store->worktree_stacks, wtname_buf.buf, stack);
+
+			strmap_put(&store->worktree_backends, wtname_buf.buf, be);
 		}
 
 		strbuf_release(&wt_dir);
-		return stack;
+		return be;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
 		 * If there is no worktree stack then we're currently in the
 		 * main worktree. We thus return the main stack in that case.
 		 */
-		if (!store->worktree_stack)
-			return store->main_stack;
-		return store->worktree_stack;
+		if (!store->worktree_backend.stack)
+			return &store->main_backend;
+		return &store->worktree_backend;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return store->main_stack;
+		return &store->main_backend;
 	default:
 		BUG("unhandled worktree reference type");
 	}
@@ -292,7 +311,7 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 	umask(mask);
 
 	base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable);
-	strmap_init(&refs->worktree_stacks);
+	strmap_init(&refs->worktree_backends);
 	refs->store_flags = store_flags;
 	refs->log_all_ref_updates = repo_settings_get_log_all_ref_updates(repo);
 
@@ -337,8 +356,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_realpath(&path, gitdir, 0);
 	}
 	strbuf_addstr(&path, "/reftable");
-	refs->err = reftable_new_stack(&refs->main_stack, path.buf,
-				       &refs->write_options);
+	refs->err = reftable_backend_init(&refs->main_backend, path.buf,
+					  &refs->write_options);
 	if (refs->err)
 		goto done;
 
@@ -354,8 +373,8 @@ static struct ref_store *reftable_be_init(struct repository *repo,
 		strbuf_reset(&path);
 		strbuf_addf(&path, "%s/reftable", gitdir);
 
-		refs->err = reftable_new_stack(&refs->worktree_stack, path.buf,
-					       &refs->write_options);
+		refs->err = reftable_backend_init(&refs->worktree_backend, path.buf,
+						  &refs->write_options);
 		if (refs->err)
 			goto done;
 	}
@@ -374,19 +393,17 @@ static void reftable_be_release(struct ref_store *ref_store)
 	struct strmap_entry *entry;
 	struct hashmap_iter iter;
 
-	if (refs->main_stack) {
-		reftable_stack_destroy(refs->main_stack);
-		refs->main_stack = NULL;
-	}
+	if (refs->main_backend.stack)
+		reftable_backend_release(&refs->main_backend);
+	if (refs->worktree_backend.stack)
+		reftable_backend_release(&refs->worktree_backend);
 
-	if (refs->worktree_stack) {
-		reftable_stack_destroy(refs->worktree_stack);
-		refs->worktree_stack = NULL;
+	strmap_for_each_entry(&refs->worktree_backends, &iter, entry) {
+		struct reftable_backend *be = entry->value;
+		reftable_backend_release(be);
+		free(be);
 	}
-
-	strmap_for_each_entry(&refs->worktree_stacks, &iter, entry)
-		reftable_stack_destroy(entry->value);
-	strmap_clear(&refs->worktree_stacks, 0);
+	strmap_clear(&refs->worktree_backends, 0);
 }
 
 static int reftable_be_create_on_disk(struct ref_store *ref_store,
@@ -781,7 +798,7 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 		required_flags |= REF_STORE_ODB;
 	refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin");
 
-	main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix,
+	main_iter = ref_iterator_for_stack(refs, refs->main_backend.stack, prefix,
 					   exclude_patterns, flags);
 
 	/*
@@ -789,14 +806,14 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto
 	 * right now. If we aren't, then we return the common reftable
 	 * iterator, only.
 	 */
-	 if (!refs->worktree_stack)
+	if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
 	/*
 	 * Otherwise we merge both the common and the per-worktree refs into a
 	 * single iterator.
 	 */
-	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix,
+	worktree_iter = ref_iterator_for_stack(refs, refs->worktree_backend.stack, prefix,
 					       exclude_patterns, flags);
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -811,7 +828,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	int ret;
 
 	if (refs->err < 0)
@@ -838,7 +855,7 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
 	int ret;
 
@@ -898,7 +915,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = stack_for(refs, update->refname, NULL);
+	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
 	size_t i;
 	int ret;
@@ -1031,7 +1048,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, stack_for(refs, "HEAD", NULL), "HEAD",
+	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1043,7 +1060,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = stack_for(refs, u->refname, &rewritten_ref);
+		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1525,9 +1542,9 @@ static int reftable_be_pack_refs(struct ref_store *ref_store,
 	if (refs->err)
 		return refs->err;
 
-	stack = refs->worktree_stack;
+	stack = refs->worktree_backend.stack;
 	if (!stack)
-		stack = refs->main_stack;
+		stack = refs->main_backend.stack;
 
 	if (opts->flags & PACK_REFS_AUTO)
 		ret = reftable_stack_auto_compact(stack);
@@ -1782,7 +1799,7 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1814,7 +1831,7 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname);
+	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -1952,11 +1969,11 @@ static struct ref_iterator *reftable_be_reflog_iterator_begin(struct ref_store *
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_iterator_begin");
 	struct reftable_reflog_iterator *main_iter, *worktree_iter;
 
-	main_iter = reflog_iterator_for_stack(refs, refs->main_stack);
-	if (!refs->worktree_stack)
+	main_iter = reflog_iterator_for_stack(refs, refs->main_backend.stack);
+	if (!refs->worktree_backend.stack)
 		return &main_iter->base;
 
-	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_stack);
+	worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_backend.stack);
 
 	return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base,
 					ref_iterator_select, NULL);
@@ -1995,7 +2012,7 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2035,7 +2052,7 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
 	size_t logs_alloc = 0, logs_nr = 0, i;
@@ -2084,7 +2101,7 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
 	int ret;
@@ -2169,7 +2186,7 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
 		.stack = stack,
@@ -2243,7 +2260,7 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct write_reflog_delete_arg arg = {
 		.stack = stack,
 		.refname = refname,
@@ -2352,7 +2369,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = stack_for(refs, refname, &refname);
+	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 02/10] refs/reftable: handle reloading stacks in the reftable backend
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 01/10] refs/reftable: encapsulate reftable stack Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 03/10] reftable/stack: add accessor for the hash ID Patrick Steinhardt
                     ` (7 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

When accessing a stack we almost always have to reload the stack before
reading data from it. This is mostly because Git does not have a
notification mechanism for when underlying data has been changed, and
thus we are forced to opportunistically reload the stack every single
time to account for any changes that may have happened concurrently.

Handle the reload internally in `backend_for()`. For one this forces
callsites to think about whether or not they need to reload the stack.
But second this makes the logic to access stacks more self-contained by
letting the `struct reftable_backend` manage themselves.

Update callsites where we don't reload the stack to document why we
don't. In some cases it's unclear whether it is the right thing to do in
the first place, but fixing that is outside of the scope of this patch
series.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 184 +++++++++++++++++++++++++++++++++---------------
 1 file changed, 126 insertions(+), 58 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index acd26f8928d18396f78a2d39ad0e0c1796d5a409..8be9cc43c990212b0b4f8d0b473f84270ff4787d 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -114,21 +114,25 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto
  * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store
  * those references in their normalized form.
  */
-static struct reftable_backend *backend_for(struct reftable_ref_store *store,
-					    const char *refname,
-					    const char **rewritten_ref)
+static int backend_for(struct reftable_backend **out,
+		       struct reftable_ref_store *store,
+		       const char *refname,
+		       const char **rewritten_ref,
+		       int reload)
 {
+	struct reftable_backend *be;
 	const char *wtname;
 	int wtname_len;
 
-	if (!refname)
-		return &store->main_backend;
+	if (!refname) {
+		be = &store->main_backend;
+		goto out;
+	}
 
 	switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) {
 	case REF_WORKTREE_OTHER: {
 		static struct strbuf wtname_buf = STRBUF_INIT;
 		struct strbuf wt_dir = STRBUF_INIT;
-		struct reftable_backend *be;
 
 		/*
 		 * We're using a static buffer here so that we don't need to
@@ -162,7 +166,7 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		}
 
 		strbuf_release(&wt_dir);
-		return be;
+		goto out;
 	}
 	case REF_WORKTREE_CURRENT:
 		/*
@@ -170,14 +174,27 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store,
 		 * main worktree. We thus return the main stack in that case.
 		 */
 		if (!store->worktree_backend.stack)
-			return &store->main_backend;
-		return &store->worktree_backend;
+			be = &store->main_backend;
+		else
+			be = &store->worktree_backend;
+		goto out;
 	case REF_WORKTREE_MAIN:
 	case REF_WORKTREE_SHARED:
-		return &store->main_backend;
+		be = &store->main_backend;
+		goto out;
 	default:
 		BUG("unhandled worktree reference type");
 	}
+
+out:
+	if (reload) {
+		int ret = reftable_stack_reload(be->stack);
+		if (ret)
+			return ret;
+	}
+	*out = be;
+
+	return 0;
 }
 
 static int should_write_log(struct reftable_ref_store *refs, const char *refname)
@@ -828,17 +845,17 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, stack, refname, oid, referent, type);
+	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -855,15 +872,15 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_ref_record ref = {0};
+	struct reftable_backend *be;
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
 	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
 		strbuf_addstr(referent, ref.value.symref);
 	else
@@ -880,7 +897,7 @@ struct reftable_transaction_update {
 
 struct write_transaction_table_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	struct reftable_addition *addition;
 	struct reftable_transaction_update *updates;
 	size_t updates_nr;
@@ -915,27 +932,37 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 				      struct ref_update *update,
 				      struct strbuf *err)
 {
-	struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack;
 	struct write_transaction_table_arg *arg = NULL;
+	struct reftable_backend *be;
 	size_t i;
 	int ret;
 
+	/*
+	 * This function gets called in a loop, and we don't want to repeatedly
+	 * reload the stack for every single ref update. Instead, we manually
+	 * reload further down in the case where we haven't yet prepared the
+	 * specific `reftable_backend`.
+	 */
+	ret = backend_for(&be, refs, update->refname, NULL, 0);
+	if (ret)
+		return ret;
+
 	/*
 	 * Search for a preexisting stack update. If there is one then we add
 	 * the update to it, otherwise we set up a new stack update.
 	 */
 	for (i = 0; !arg && i < tx_data->args_nr; i++)
-		if (tx_data->args[i].stack == stack)
+		if (tx_data->args[i].be == be)
 			arg = &tx_data->args[i];
 
 	if (!arg) {
 		struct reftable_addition *addition;
 
-		ret = reftable_stack_reload(stack);
+		ret = reftable_stack_reload(be->stack);
 		if (ret)
 			return ret;
 
-		ret = reftable_stack_new_addition(&addition, stack,
+		ret = reftable_stack_new_addition(&addition, be->stack,
 						  REFTABLE_STACK_NEW_ADDITION_RELOAD);
 		if (ret) {
 			if (ret == REFTABLE_LOCK_ERROR)
@@ -947,7 +974,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out,
 			   tx_data->args_alloc);
 		arg = &tx_data->args[tx_data->args_nr++];
 		arg->refs = refs;
-		arg->stack = stack;
+		arg->be = be;
 		arg->addition = addition;
 		arg->updates = NULL;
 		arg->updates_nr = 0;
@@ -1002,6 +1029,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	struct strbuf referent = STRBUF_INIT, head_referent = STRBUF_INIT;
 	struct string_list affected_refnames = STRING_LIST_INIT_NODUP;
 	struct reftable_transaction_data *tx_data = NULL;
+	struct reftable_backend *be;
 	struct object_id head_oid;
 	unsigned int head_type = 0;
 	size_t i;
@@ -1048,7 +1076,22 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 		goto done;
 	}
 
-	ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD",
+	/*
+	 * TODO: it's dubious whether we should reload the stack that "HEAD"
+	 * belongs to or not. In theory, it may happen that we only modify
+	 * stacks which are _not_ part of the "HEAD" stack. In that case we
+	 * wouldn't have prepared any transaction for its stack and would not
+	 * have reloaded it, which may mean that it is stale.
+	 *
+	 * On the other hand, reloading that stack without locking it feels
+	 * wrong, too, as the value of "HEAD" could be modified concurrently at
+	 * any point in time.
+	 */
+	ret = backend_for(&be, refs, "HEAD", NULL, 0);
+	if (ret)
+		goto done;
+
+	ret = read_ref_without_reload(refs, be->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1057,10 +1100,18 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	for (i = 0; i < transaction->nr; i++) {
 		struct ref_update *u = transaction->updates[i];
 		struct object_id current_oid = {0};
-		struct reftable_stack *stack;
 		const char *rewritten_ref;
 
-		stack = backend_for(refs, u->refname, &rewritten_ref)->stack;
+		/*
+		 * There is no need to reload the respective backends here as
+		 * we have already reloaded them when preparing the transaction
+		 * update. And given that the stacks have been locked there
+		 * shouldn't have been any concurrent modifications of the
+		 * stack.
+		 */
+		ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0);
+		if (ret)
+			goto done;
 
 		/* Verify that the new object ID is valid. */
 		if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) &&
@@ -1116,7 +1167,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, stack, rewritten_ref,
+		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
 					      &current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
@@ -1318,7 +1369,7 @@ static int transaction_update_cmp(const void *a, const void *b)
 static int write_transaction_table(struct reftable_writer *writer, void *cb_data)
 {
 	struct write_transaction_table_arg *arg = cb_data;
-	uint64_t ts = reftable_stack_next_update_index(arg->stack);
+	uint64_t ts = reftable_stack_next_update_index(arg->be->stack);
 	struct reftable_log_record *logs = NULL;
 	struct ident_split committer_ident = {0};
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -1354,7 +1405,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data
 			struct reftable_log_record log = {0};
 			struct reftable_iterator it = {0};
 
-			ret = reftable_stack_init_log_iterator(arg->stack, &it);
+			ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 			if (ret < 0)
 				goto done;
 
@@ -1799,10 +1850,9 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1814,10 +1864,11 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1831,10 +1882,9 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack;
+	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.oldname = oldrefname,
 		.newname = newrefname,
 		.logmsg = logmsg,
@@ -1845,10 +1895,11 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	ret = reftable_stack_add(stack, &write_copy_table, &arg);
+	arg.stack = be->stack;
+	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -2012,15 +2063,23 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	/*
+	 * TODO: we should adapt this callsite to reload the stack. There is no
+	 * obvious reason why we shouldn't.
+	 */
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2052,16 +2111,24 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	size_t logs_alloc = 0, logs_nr = 0, i;
 	int ret;
 
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	/*
+	 * TODO: we should adapt this callsite to reload the stack. There is no
+	 * obvious reason why we shouldn't.
+	 */
+	ret = backend_for(&be, refs, refname, &refname, 0);
+	if (ret)
+		goto done;
+
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2101,20 +2168,20 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record log = {0};
 	struct reftable_iterator it = {0};
+	struct reftable_backend *be;
 	int ret;
 
 	ret = refs->err;
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2186,10 +2253,9 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_existence_arg arg = {
 		.refs = refs,
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
@@ -2198,11 +2264,12 @@ static int reftable_be_create_reflog(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		goto done;
+	arg.stack = be->stack;
 
-	ret = reftable_stack_add(stack, &write_reflog_existence_table, &arg);
+	ret = reftable_stack_add(be->stack, &write_reflog_existence_table, &arg);
 
 done:
 	return ret;
@@ -2260,17 +2327,18 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
+	struct reftable_backend *be;
 	struct write_reflog_delete_arg arg = {
-		.stack = stack,
 		.refname = refname,
 	};
 	int ret;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
-	ret = reftable_stack_add(stack, &write_reflog_delete_table, &arg);
+	arg.stack = be->stack;
+
+	ret = reftable_stack_add(be->stack, &write_reflog_delete_table, &arg);
 
 	assert(ret != REFTABLE_API_ERROR);
 	return ret;
@@ -2369,13 +2437,13 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	 */
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
-	struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack;
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
 	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
+	struct reftable_backend *be;
 	struct object_id oid = {0};
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
@@ -2384,11 +2452,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (refs->err < 0)
 		return refs->err;
 
-	ret = reftable_stack_reload(stack);
+	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_init_log_iterator(stack, &it);
+	ret = reftable_stack_init_log_iterator(be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -2396,11 +2464,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_new_addition(&add, stack, 0);
+	ret = reftable_stack_new_addition(&add, be->stack, 0);
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(stack, refname, &ref_record);
+	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
 	if (ret < 0)
 		goto done;
 	if (reftable_ref_record_val1(&ref_record))
@@ -2479,8 +2547,8 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	arg.refs = refs;
 	arg.records = rewritten;
 	arg.len = logs_nr;
-	arg.stack = stack,
-	arg.refname = refname,
+	arg.stack = be->stack;
+	arg.refname = refname;
 
 	ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg);
 	if (ret < 0)

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 03/10] reftable/stack: add accessor for the hash ID
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 01/10] refs/reftable: encapsulate reftable stack Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 02/10] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 04/10] refs/reftable: figure out hash via `reftable_stack` Patrick Steinhardt
                     ` (6 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Add an accessor function that allows callers to access the hash ID of a
reftable stack. This function will be used in a subsequent commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reftable-stack.h | 3 +++
 reftable/stack.c          | 5 +++++
 2 files changed, 8 insertions(+)

diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h
index 54787f2ef530406a7970db058c3a0cf456897978..ae14270ea74108cd4c314ec38e7d5c9a4e731481 100644
--- a/reftable/reftable-stack.h
+++ b/reftable/reftable-stack.h
@@ -149,4 +149,7 @@ struct reftable_compaction_stats {
 struct reftable_compaction_stats *
 reftable_stack_compaction_stats(struct reftable_stack *st);
 
+/* Return the hash of the stack. */
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st);
+
 #endif
diff --git a/reftable/stack.c b/reftable/stack.c
index 1fffd75630266c0d3d1e4a2d037b5cf90455529d..d97b64a40d4ad05cfd9e6f33e8ba1e713281ef6d 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -1791,3 +1791,8 @@ int reftable_stack_clean(struct reftable_stack *st)
 	reftable_addition_destroy(add);
 	return err;
 }
+
+enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st)
+{
+	return reftable_merged_table_hash_id(st->merged);
+}

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 04/10] refs/reftable: figure out hash via `reftable_stack`
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (2 preceding siblings ...)
  2024-11-26  6:42   ` [PATCH v4 03/10] reftable/stack: add accessor for the hash ID Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 05/10] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
                     ` (5 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

The function `read_ref_without_reload()` accepts a ref store as input
only so that we can figure out the hash function used by it. This is
duplicate information though because the reftable stack knows about its
hash function, too.

Drop the superfluous parameter to simplify the calling convention a bit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 8be9cc43c990212b0b4f8d0b473f84270ff4787d..302de622d44ccbe8eceee0654f11ad7184791eba 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -243,8 +243,7 @@ static void fill_reftable_log_record(struct reftable_log_record *log, const stru
 	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
-static int read_ref_without_reload(struct reftable_ref_store *refs,
-				   struct reftable_stack *stack,
+static int read_ref_without_reload(struct reftable_stack *stack,
 				   const char *refname,
 				   struct object_id *oid,
 				   struct strbuf *referent,
@@ -262,8 +261,21 @@ static int read_ref_without_reload(struct reftable_ref_store *refs,
 		strbuf_addstr(referent, ref.value.symref);
 		*type |= REF_ISSYMREF;
 	} else if (reftable_ref_record_val1(&ref)) {
+		unsigned int hash_id;
+
+		switch (reftable_stack_hash_id(stack)) {
+		case REFTABLE_HASH_SHA1:
+			hash_id = GIT_HASH_SHA1;
+			break;
+		case REFTABLE_HASH_SHA256:
+			hash_id = GIT_HASH_SHA256;
+			break;
+		default:
+			BUG("unhandled hash ID %d", reftable_stack_hash_id(stack));
+		}
+
 		oidread(oid, reftable_ref_record_val1(&ref),
-			refs->base.repo->hash_algo);
+			&hash_algos[hash_id]);
 	} else {
 		/* We got a tombstone, which should not happen. */
 		BUG("unhandled reference value type %d", ref.value_type);
@@ -855,7 +867,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type);
+	ret = read_ref_without_reload(be->stack, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -1091,7 +1103,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	if (ret)
 		goto done;
 
-	ret = read_ref_without_reload(refs, be->stack, "HEAD",
+	ret = read_ref_without_reload(be->stack, "HEAD",
 				      &head_oid, &head_referent, &head_type);
 	if (ret < 0)
 		goto done;
@@ -1167,7 +1179,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(refs, be->stack, rewritten_ref,
+		ret = read_ref_without_reload(be->stack, rewritten_ref,
 					      &current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
@@ -1733,7 +1745,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
-		ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid,
+		ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid,
 					      &head_referent, &head_type);
 		if (ret < 0)
 			goto done;

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 05/10] refs/reftable: read references via `struct reftable_backend`
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (3 preceding siblings ...)
  2024-11-26  6:42   ` [PATCH v4 04/10] refs/reftable: figure out hash via `reftable_stack` Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 06/10] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
                     ` (4 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Refactor `read_ref_without_reload()` to accept `struct reftable_backend`
as parameter instead of `struct reftable_stack`. Rename the function to
`reftable_backend_read_ref()` to clarify its scope and move it close to
other functions operating on `struct reftable_backend`.

This change allows us to implement an additional caching layer when
reading refs where we can reuse reftable iterators.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 122 +++++++++++++++++++++++-------------------------
 1 file changed, 59 insertions(+), 63 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 302de622d44ccbe8eceee0654f11ad7184791eba..88910207b87b1fa5bbbedebc8817f8afab937b77 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be)
 	be->stack = NULL;
 }
 
+static int reftable_backend_read_ref(struct reftable_backend *be,
+				     const char *refname,
+				     struct object_id *oid,
+				     struct strbuf *referent,
+				     unsigned int *type)
+{
+	struct reftable_ref_record ref = {0};
+	int ret;
+
+	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (ret)
+		goto done;
+
+	if (ref.value_type == REFTABLE_REF_SYMREF) {
+		strbuf_reset(referent);
+		strbuf_addstr(referent, ref.value.symref);
+		*type |= REF_ISSYMREF;
+	} else if (reftable_ref_record_val1(&ref)) {
+		unsigned int hash_id;
+
+		switch (reftable_stack_hash_id(be->stack)) {
+		case REFTABLE_HASH_SHA1:
+			hash_id = GIT_HASH_SHA1;
+			break;
+		case REFTABLE_HASH_SHA256:
+			hash_id = GIT_HASH_SHA256;
+			break;
+		default:
+			BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack));
+		}
+
+		oidread(oid, reftable_ref_record_val1(&ref),
+			&hash_algos[hash_id]);
+	} else {
+		/* We got a tombstone, which should not happen. */
+		BUG("unhandled reference value type %d", ref.value_type);
+	}
+
+done:
+	assert(ret != REFTABLE_API_ERROR);
+	reftable_ref_record_release(&ref);
+	return ret;
+}
+
 struct reftable_ref_store {
 	struct ref_store base;
 
@@ -243,50 +287,6 @@ static void fill_reftable_log_record(struct reftable_log_record *log, const stru
 	log->value.update.tz_offset = sign * atoi(tz_begin);
 }
 
-static int read_ref_without_reload(struct reftable_stack *stack,
-				   const char *refname,
-				   struct object_id *oid,
-				   struct strbuf *referent,
-				   unsigned int *type)
-{
-	struct reftable_ref_record ref = {0};
-	int ret;
-
-	ret = reftable_stack_read_ref(stack, refname, &ref);
-	if (ret)
-		goto done;
-
-	if (ref.value_type == REFTABLE_REF_SYMREF) {
-		strbuf_reset(referent);
-		strbuf_addstr(referent, ref.value.symref);
-		*type |= REF_ISSYMREF;
-	} else if (reftable_ref_record_val1(&ref)) {
-		unsigned int hash_id;
-
-		switch (reftable_stack_hash_id(stack)) {
-		case REFTABLE_HASH_SHA1:
-			hash_id = GIT_HASH_SHA1;
-			break;
-		case REFTABLE_HASH_SHA256:
-			hash_id = GIT_HASH_SHA256;
-			break;
-		default:
-			BUG("unhandled hash ID %d", reftable_stack_hash_id(stack));
-		}
-
-		oidread(oid, reftable_ref_record_val1(&ref),
-			&hash_algos[hash_id]);
-	} else {
-		/* We got a tombstone, which should not happen. */
-		BUG("unhandled reference value type %d", ref.value_type);
-	}
-
-done:
-	assert(ret != REFTABLE_API_ERROR);
-	reftable_ref_record_release(&ref);
-	return ret;
-}
-
 static int reftable_be_config(const char *var, const char *value,
 			      const struct config_context *ctx,
 			      void *_opts)
@@ -867,7 +867,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store,
 	if (ret)
 		return ret;
 
-	ret = read_ref_without_reload(be->stack, refname, oid, referent, type);
+	ret = reftable_backend_read_ref(be, refname, oid, referent, type);
 	if (ret < 0)
 		return ret;
 	if (ret > 0) {
@@ -1103,8 +1103,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 	if (ret)
 		goto done;
 
-	ret = read_ref_without_reload(be->stack, "HEAD",
-				      &head_oid, &head_referent, &head_type);
+	ret = reftable_backend_read_ref(be, "HEAD", &head_oid,
+					&head_referent, &head_type);
 	if (ret < 0)
 		goto done;
 	ret = 0;
@@ -1179,8 +1179,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store,
 			string_list_insert(&affected_refnames, new_update->refname);
 		}
 
-		ret = read_ref_without_reload(be->stack, rewritten_ref,
-					      &current_oid, &referent, &u->type);
+		ret = reftable_backend_read_ref(be, rewritten_ref,
+						&current_oid, &referent, &u->type);
 		if (ret < 0)
 			goto done;
 		if (ret > 0 && !ref_update_expects_existing_old_ref(u)) {
@@ -1638,7 +1638,7 @@ struct write_create_symref_arg {
 
 struct write_copy_arg {
 	struct reftable_ref_store *refs;
-	struct reftable_stack *stack;
+	struct reftable_backend *be;
 	const char *oldname;
 	const char *newname;
 	const char *logmsg;
@@ -1663,7 +1663,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	if (split_ident_line(&committer_ident, committer_info, strlen(committer_info)))
 		BUG("failed splitting committer info");
 
-	if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) {
+	if (reftable_stack_read_ref(arg->be->stack, arg->oldname, &old_ref)) {
 		ret = error(_("refname %s not found"), arg->oldname);
 		goto done;
 	}
@@ -1702,7 +1702,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * the old branch and the creation of the new branch, and we cannot do
 	 * two changes to a reflog in a single update.
 	 */
-	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack);
+	deletion_ts = creation_ts = reftable_stack_next_update_index(arg->be->stack);
 	if (arg->delete_old)
 		creation_ts++;
 	reftable_writer_set_limits(writer, deletion_ts, creation_ts);
@@ -1745,8 +1745,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 		memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ);
 		logs_nr++;
 
-		ret = read_ref_without_reload(arg->stack, "HEAD", &head_oid,
-					      &head_referent, &head_type);
+		ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid,
+						&head_referent, &head_type);
 		if (ret < 0)
 			goto done;
 		append_head_reflog = (head_type & REF_ISSYMREF) && !strcmp(head_referent.buf, arg->oldname);
@@ -1789,7 +1789,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data)
 	 * copy over all log entries from the old reflog. Last but not least,
 	 * when renaming we also have to delete all the old reflog entries.
 	 */
-	ret = reftable_stack_init_log_iterator(arg->stack, &it);
+	ret = reftable_stack_init_log_iterator(arg->be->stack, &it);
 	if (ret < 0)
 		goto done;
 
@@ -1862,7 +1862,6 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1876,11 +1875,10 @@ static int reftable_be_rename_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);
@@ -1894,7 +1892,6 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref");
-	struct reftable_backend *be;
 	struct write_copy_arg arg = {
 		.refs = refs,
 		.oldname = oldrefname,
@@ -1907,11 +1904,10 @@ static int reftable_be_copy_ref(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = backend_for(&be, refs, newrefname, &newrefname, 1);
+	ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1);
 	if (ret)
 		goto done;
-	arg.stack = be->stack;
-	ret = reftable_stack_add(be->stack, &write_copy_table, &arg);
+	ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg);
 
 done:
 	assert(ret != REFTABLE_API_ERROR);

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 06/10] refs/reftable: refactor reading symbolic refs to use reftable backend
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (4 preceding siblings ...)
  2024-11-26  6:42   ` [PATCH v4 05/10] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 07/10] refs/reftable: refactor reflog expiry " Patrick Steinhardt
                     ` (3 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Refactor the callback function that reads symbolic references in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 88910207b87b1fa5bbbedebc8817f8afab937b77..2d06620ac8b1b7f07783cd5873dbe0fe67b84bd6 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -884,21 +884,18 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store,
 {
 	struct reftable_ref_store *refs =
 		reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref");
-	struct reftable_ref_record ref = {0};
 	struct reftable_backend *be;
+	struct object_id oid;
+	unsigned int type = 0;
 	int ret;
 
 	ret = backend_for(&be, refs, refname, &refname, 1);
 	if (ret)
 		return ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
-	if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF)
-		strbuf_addstr(referent, ref.value.symref);
-	else
+	ret = reftable_backend_read_ref(be, refname, &oid, referent, &type);
+	if (type != REF_ISSYMREF)
 		ret = -1;
-
-	reftable_ref_record_release(&ref);
 	return ret;
 }
 

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 07/10] refs/reftable: refactor reflog expiry to use reftable backend
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (5 preceding siblings ...)
  2024-11-26  6:42   ` [PATCH v4 06/10] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:42   ` [PATCH v4 08/10] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
                     ` (2 subsequent siblings)
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Refactor the callback function that expires reflog entries in the
reftable backend to use `reftable_backend_read_ref()` instead of
accessing the reftable stack directly. This ensures that the function
will benefit from the new caching layer that we're about to introduce.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index 2d06620ac8b1b7f07783cd5873dbe0fe67b84bd6..b6638d43028d11ae7621dcd4e0dbf1d3174743b7 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -2444,14 +2444,15 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire");
 	struct reftable_log_record *logs = NULL;
 	struct reftable_log_record *rewritten = NULL;
-	struct reftable_ref_record ref_record = {0};
 	struct reftable_iterator it = {0};
 	struct reftable_addition *add = NULL;
 	struct reflog_expiry_arg arg = {0};
 	struct reftable_backend *be;
 	struct object_id oid = {0};
+	struct strbuf referent = STRBUF_INIT;
 	uint8_t *last_hash = NULL;
 	size_t logs_nr = 0, logs_alloc = 0, i;
+	unsigned int type = 0;
 	int ret;
 
 	if (refs->err < 0)
@@ -2473,12 +2474,9 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 	if (ret < 0)
 		goto done;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref_record);
+	ret = reftable_backend_read_ref(be, refname, &oid, &referent, &type);
 	if (ret < 0)
 		goto done;
-	if (reftable_ref_record_val1(&ref_record))
-		oidread(&oid, reftable_ref_record_val1(&ref_record),
-			ref_store->repo->hash_algo);
 	prepare_fn(refname, &oid, policy_cb_data);
 
 	while (1) {
@@ -2545,8 +2543,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		}
 	}
 
-	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash &&
-	    reftable_ref_record_val1(&ref_record))
+	if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash && !is_null_oid(&oid))
 		oidread(&arg.update_oid, last_hash, ref_store->repo->hash_algo);
 
 	arg.refs = refs;
@@ -2571,11 +2568,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store,
 		cleanup_fn(policy_cb_data);
 	assert(ret != REFTABLE_API_ERROR);
 
-	reftable_ref_record_release(&ref_record);
 	reftable_iterator_destroy(&it);
 	reftable_addition_destroy(add);
 	for (i = 0; i < logs_nr; i++)
 		reftable_log_record_release(&logs[i]);
+	strbuf_release(&referent);
 	free(logs);
 	free(rewritten);
 	return ret;

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 08/10] reftable/stack: add mechanism to notify callers on reload
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (6 preceding siblings ...)
  2024-11-26  6:42   ` [PATCH v4 07/10] refs/reftable: refactor reflog expiry " Patrick Steinhardt
@ 2024-11-26  6:42   ` Patrick Steinhardt
  2024-11-26  6:43   ` [PATCH v4 09/10] reftable/merged: drain priority queue on reseek Patrick Steinhardt
  2024-11-26  6:43   ` [PATCH v4 10/10] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:42 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

Reftable stacks are reloaded in two cases:

  - When calling `reftable_stack_reload()`, if the stat-cache tells us
    that the stack has been modified.

  - When committing a reftable addition.

While callers can figure out the second case, they do not have a
mechanism to figure out whether `reftable_stack_reload()` led to an
actual reload of the on-disk data. All they can do is thus to assume
that data is always being reloaded in that case.

Improve the situation by introducing a new `on_reload()` callback to the
reftable options. If provided, the function will be invoked every time
the stack has indeed been reloaded. This allows callers to invalidate
data that depends on the current stack data.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/reftable-writer.h | 9 +++++++++
 reftable/stack.c           | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h
index c85ef5a5bd14595d75f99457fef4407040e197c5..5f9afa620bb00de66c311765fb0ae8c6f56401ae 100644
--- a/reftable/reftable-writer.h
+++ b/reftable/reftable-writer.h
@@ -68,6 +68,15 @@ struct reftable_write_options {
 	 * fsync(3P) when unset.
 	 */
 	int (*fsync)(int fd);
+
+	/*
+	 * Callback function to execute whenever the stack is being reloaded.
+	 * This can be used e.g. to discard cached information that relies on
+	 * the old stack's data. The payload data will be passed as argument to
+	 * the callback.
+	 */
+	void (*on_reload)(void *payload);
+	void *on_reload_payload;
 };
 
 /* reftable_block_stats holds statistics for a single block type */
diff --git a/reftable/stack.c b/reftable/stack.c
index d97b64a40d4ad05cfd9e6f33e8ba1e713281ef6d..5384ca9de0d1f064aebcb09308a74cc397b37463 100644
--- a/reftable/stack.c
+++ b/reftable/stack.c
@@ -548,6 +548,10 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st,
 		close(fd);
 	free_names(names);
 	free_names(names_after);
+
+	if (st->opts.on_reload)
+		st->opts.on_reload(st->opts.on_reload_payload);
+
 	return err;
 }
 

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 09/10] reftable/merged: drain priority queue on reseek
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (7 preceding siblings ...)
  2024-11-26  6:42   ` [PATCH v4 08/10] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
@ 2024-11-26  6:43   ` Patrick Steinhardt
  2024-11-26  6:43   ` [PATCH v4 10/10] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:43 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

In 5bf96e0c39 (reftable/generic: move seeking of records into the
iterator, 2024-05-13) we have refactored the reftable codebase such that
iterators can be initialized once and then re-seeked multiple times.
This feature is used by 1869525066 (refs/reftable: wire up support for
exclude patterns, 2024-09-16) in order to skip records based on exclude
patterns provided by the caller.

The logic to re-seek the merged iterator is insufficient though because
we don't drain the priority queue on a re-seek. This means that the
queue may contain stale entries and thus reading the next record in the
queue will return the wrong entry. While this is an obvious bug, it is
harmless in the context of above exclude patterns:

  - If the queue contained stale entries that match the pattern then the
    caller would already know to filter out such refs. This is because
    our codebase is prepared to handle backends that don't have a way to
    efficiently implement exclude patterns.

  - If the queue contained stale entries that don't match the pattern
    we'd eventually filter out any duplicates. This is because the
    reftable code discards items with the same ref name and sorts any
    remaining entries properly.

So things happen to work in this context regardless of the bug, and
there is no other use case yet where we re-seek iterators. We're about
to introduce a caching mechanism though where iterators are reused by
the reftable backend, and that will expose the bug.

Fix the issue by draining the priority queue when seeking and add a
testcase that surfaces the issue.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 reftable/merged.c                |  2 ++
 t/unit-tests/t-reftable-merged.c | 73 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)

diff --git a/reftable/merged.c b/reftable/merged.c
index 5b93e20f42945300abbc1a036bbdf067fced7854..bb0836e3443271f9c0d5ba5582c78694d437ddc2 100644
--- a/reftable/merged.c
+++ b/reftable/merged.c
@@ -66,6 +66,8 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want
 	int err;
 
 	mi->advance_index = -1;
+	while (!merged_iter_pqueue_is_empty(mi->pq))
+		merged_iter_pqueue_remove(&mi->pq);
 
 	for (size_t i = 0; i < mi->subiters_len; i++) {
 		err = iterator_seek(&mi->subiters[i].iter, want);
diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c
index 2591b5e59745536a205271491f747875e04c5a3f..a12bd0e1a3bdeda82bcbc6259e679df4f232e3e2 100644
--- a/t/unit-tests/t-reftable-merged.c
+++ b/t/unit-tests/t-reftable-merged.c
@@ -273,6 +273,78 @@ static void t_merged_seek_multiple_times(void)
 	reftable_free(sources);
 }
 
+static void t_merged_seek_multiple_times_without_draining(void)
+{
+	struct reftable_ref_record r1[] = {
+		{
+			.refname = (char *) "a",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 1 },
+		},
+		{
+			.refname = (char *) "c",
+			.update_index = 1,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 2 },
+		}
+	};
+	struct reftable_ref_record r2[] = {
+		{
+			.refname = (char *) "b",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 3 },
+		},
+		{
+			.refname = (char *) "d",
+			.update_index = 2,
+			.value_type = REFTABLE_REF_VAL1,
+			.value.val1 = { 4 },
+		},
+	};
+	struct reftable_ref_record *refs[] = {
+		r1, r2,
+	};
+	size_t sizes[] = {
+		ARRAY_SIZE(r1), ARRAY_SIZE(r2),
+	};
+	struct reftable_buf bufs[] = {
+		REFTABLE_BUF_INIT, REFTABLE_BUF_INIT,
+	};
+	struct reftable_block_source *sources = NULL;
+	struct reftable_reader **readers = NULL;
+	struct reftable_ref_record rec = { 0 };
+	struct reftable_iterator it = { 0 };
+	struct reftable_merged_table *mt;
+	int err;
+
+	mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2);
+	merged_table_init_iter(mt, &it, BLOCK_TYPE_REF);
+
+	err = reftable_iterator_seek_ref(&it, "b");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1);
+	check(err == 1);
+
+	err = reftable_iterator_seek_ref(&it, "a");
+	check(!err);
+	err = reftable_iterator_next_ref(&it, &rec);
+	check(!err);
+	err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1);
+	check(err == 1);
+
+	for (size_t i = 0; i < ARRAY_SIZE(bufs); i++)
+		reftable_buf_release(&bufs[i]);
+	readers_destroy(readers, ARRAY_SIZE(refs));
+	reftable_ref_record_release(&rec);
+	reftable_iterator_destroy(&it);
+	reftable_merged_table_free(mt);
+	reftable_free(sources);
+}
+
 static struct reftable_merged_table *
 merged_table_from_log_records(struct reftable_log_record **logs,
 			      struct reftable_block_source **source,
@@ -467,6 +539,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED)
 	TEST(t_merged_logs(), "merged table with multiple log updates for same ref");
 	TEST(t_merged_refs(), "merged table with multiple updates to same ref");
 	TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times");
+	TEST(t_merged_seek_multiple_times_without_draining(), "merged table can seek multiple times without draining");
 	TEST(t_merged_single_record(), "ref occurring in only one record can be fetched");
 
 	return test_done();

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [PATCH v4 10/10] refs/reftable: reuse iterators when reading refs
  2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
                     ` (8 preceding siblings ...)
  2024-11-26  6:43   ` [PATCH v4 09/10] reftable/merged: drain priority queue on reseek Patrick Steinhardt
@ 2024-11-26  6:43   ` Patrick Steinhardt
  9 siblings, 0 replies; 57+ messages in thread
From: Patrick Steinhardt @ 2024-11-26  6:43 UTC (permalink / raw)
  To: git; +Cc: karthik nayak, Junio C Hamano

When reading references the reftable backend has to:

  1. Create a new ref iterator.

  2. Seek the iterator to the record we're searching for.

  3. Read the record.

We cannot really avoid the last two steps, but re-creating the iterator
every single time we want to read a reference is kind of expensive and a
waste of resources. We couldn't help it in the past though because it
was not possible to reuse iterators. But starting with 5bf96e0c39
(reftable/generic: move seeking of records into the iterator,
2024-05-13) we have split up the iterator lifecycle such that creating
the iterator and seeking are two different concerns.

Refactor the code such that we cache iterators in the reftable backend.
This cache is invalidated whenever the respective stack is reloaded such
that we know to recreate the iterator in that case. This leads to a
sizeable speedup when creating many refs, which requires a lot of random
reference reads:

    Benchmark 1: update-ref: create many refs (refcount = 100000, revision = master)
      Time (mean ± σ):      1.793 s ±  0.010 s    [User: 0.954 s, System: 0.835 s]
      Range (min … max):    1.781 s …  1.811 s    10 runs

    Benchmark 2: update-ref: create many refs (refcount = 100000, revision = HEAD)
      Time (mean ± σ):      1.680 s ±  0.013 s    [User: 0.846 s, System: 0.831 s]
      Range (min … max):    1.664 s …  1.702 s    10 runs

    Summary
      update-ref: create many refs (refcount = 100000, revision = HEAD) ran
        1.07 ± 0.01 times faster than update-ref: create many refs (refcount = 100000, revision = master)

While 7% is not a huge win, you have to consider that the benchmark is
_writing_ data, so _reading_ references is only one part of what we do.
Flame graphs show that we spend around 40% of our time reading refs, so
the speedup when reading refs is approximately ~2.5x that. I could not
find better benchmarks where we perform a lot of random ref reads.

You can also see a sizeable impact on memory usage when creating 100k
references. Before this change:

    HEAP SUMMARY:
        in use at exit: 19,112,538 bytes in 200,170 blocks
      total heap usage: 8,400,426 allocs, 8,200,256 frees, 454,367,048 bytes allocated

After this change:

    HEAP SUMMARY:
        in use at exit: 674,416 bytes in 169 blocks
      total heap usage: 7,929,872 allocs, 7,929,703 frees, 281,509,985 bytes allocated

As an additional factor, this refactoring opens up the possibility for
more performance optimizations in how we re-seek iterators. Any change
that allows us to optimize re-seeking by e.g. reusing data structures
would thus also directly speed up random reads.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 refs/reftable-backend.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c
index b6638d43028d11ae7621dcd4e0dbf1d3174743b7..a80c334f384acd0d72a1c4ef9e59c93600dd6db4 100644
--- a/refs/reftable-backend.c
+++ b/refs/reftable-backend.c
@@ -36,19 +36,30 @@
 
 struct reftable_backend {
 	struct reftable_stack *stack;
+	struct reftable_iterator it;
 };
 
+static void reftable_backend_on_reload(void *payload)
+{
+	struct reftable_backend *be = payload;
+	reftable_iterator_destroy(&be->it);
+}
+
 static int reftable_backend_init(struct reftable_backend *be,
 				 const char *path,
-				 const struct reftable_write_options *opts)
+				 const struct reftable_write_options *_opts)
 {
-	return reftable_new_stack(&be->stack, path, opts);
+	struct reftable_write_options opts = *_opts;
+	opts.on_reload = reftable_backend_on_reload;
+	opts.on_reload_payload = be;
+	return reftable_new_stack(&be->stack, path, &opts);
 }
 
 static void reftable_backend_release(struct reftable_backend *be)
 {
 	reftable_stack_destroy(be->stack);
 	be->stack = NULL;
+	reftable_iterator_destroy(&be->it);
 }
 
 static int reftable_backend_read_ref(struct reftable_backend *be,
@@ -60,10 +71,25 @@ static int reftable_backend_read_ref(struct reftable_backend *be,
 	struct reftable_ref_record ref = {0};
 	int ret;
 
-	ret = reftable_stack_read_ref(be->stack, refname, &ref);
+	if (!be->it.ops) {
+		ret = reftable_stack_init_ref_iterator(be->stack, &be->it);
+		if (ret)
+			goto done;
+	}
+
+	ret = reftable_iterator_seek_ref(&be->it, refname);
 	if (ret)
 		goto done;
 
+	ret = reftable_iterator_next_ref(&be->it, &ref);
+	if (ret)
+		goto done;
+
+	if (strcmp(ref.refname, refname)) {
+		ret = 1;
+		goto done;
+	}
+
 	if (ref.value_type == REFTABLE_REF_SYMREF) {
 		strbuf_reset(referent);
 		strbuf_addstr(referent, ref.value.symref);

-- 
2.47.0.366.gd4f858ca17.dirty


^ permalink raw reply related	[flat|nested] 57+ messages in thread

end of thread, other threads:[~2024-11-26  6:43 UTC | newest]

Thread overview: 57+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-04 15:11 [PATCH 0/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
2024-11-04 15:11 ` [PATCH 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
2024-11-05 11:03   ` karthik nayak
2024-11-04 15:11 ` [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
2024-11-05 11:14   ` karthik nayak
2024-11-06 10:43     ` Patrick Steinhardt
2024-11-04 15:11 ` [PATCH 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
2024-11-05 11:20   ` karthik nayak
2024-11-04 15:11 ` [PATCH 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
2024-11-04 15:11 ` [PATCH 5/8] refs/reftable: refactor reflog expiry " Patrick Steinhardt
2024-11-04 15:11 ` [PATCH 6/8] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
2024-11-04 15:11 ` [PATCH 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
2024-11-05  3:16   ` Junio C Hamano
2024-11-05  3:23     ` Junio C Hamano
2024-11-05  7:14       ` Patrick Steinhardt
2024-11-04 15:11 ` [PATCH 8/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
2024-11-05  4:49 ` [PATCH 0/8] " Junio C Hamano
2024-11-05  9:11 ` [PATCH v2 " Patrick Steinhardt
2024-11-05  9:11   ` [PATCH v2 1/8] refs/reftable: encapsulate reftable stack Patrick Steinhardt
2024-11-12  6:07     ` Junio C Hamano
2024-11-05  9:12   ` [PATCH v2 2/8] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
2024-11-12  6:41     ` Junio C Hamano
2024-11-12  9:05       ` Patrick Steinhardt
2024-11-05  9:12   ` [PATCH v2 3/8] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
2024-11-12  7:26     ` Junio C Hamano
2024-11-12  9:05       ` Patrick Steinhardt
2024-11-05  9:12   ` [PATCH v2 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
2024-11-05  9:12   ` [PATCH v2 5/8] refs/reftable: refactor reflog expiry " Patrick Steinhardt
2024-11-05  9:12   ` [PATCH v2 6/8] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
2024-11-05  9:12   ` [PATCH v2 7/8] reftable/merged: drain priority queue on reseek Patrick Steinhardt
2024-11-05  9:12   ` [PATCH v2 8/8] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
2024-11-25  7:38 ` [PATCH v3 0/9] " Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 1/9] refs/reftable: encapsulate reftable stack Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 2/9] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
2024-11-26  0:31     ` Junio C Hamano
2024-11-25  7:38   ` [PATCH v3 3/9] reftable/stack: add accessor for the hash ID Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 4/9] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
2024-11-26  0:48     ` Junio C Hamano
2024-11-26  6:41       ` Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 5/9] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 6/9] refs/reftable: refactor reflog expiry " Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 7/9] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 8/9] reftable/merged: drain priority queue on reseek Patrick Steinhardt
2024-11-25  7:38   ` [PATCH v3 9/9] refs/reftable: reuse iterators when reading refs Patrick Steinhardt
2024-11-25  9:47   ` [PATCH v3 0/9] " Christian Couder
2024-11-25  9:52     ` Patrick Steinhardt
2024-11-26  6:42 ` [PATCH v4 00/10] " Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 01/10] refs/reftable: encapsulate reftable stack Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 02/10] refs/reftable: handle reloading stacks in the reftable backend Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 03/10] reftable/stack: add accessor for the hash ID Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 04/10] refs/reftable: figure out hash via `reftable_stack` Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 05/10] refs/reftable: read references via `struct reftable_backend` Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 06/10] refs/reftable: refactor reading symbolic refs to use reftable backend Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 07/10] refs/reftable: refactor reflog expiry " Patrick Steinhardt
2024-11-26  6:42   ` [PATCH v4 08/10] reftable/stack: add mechanism to notify callers on reload Patrick Steinhardt
2024-11-26  6:43   ` [PATCH v4 09/10] reftable/merged: drain priority queue on reseek Patrick Steinhardt
2024-11-26  6:43   ` [PATCH v4 10/10] refs/reftable: reuse iterators when reading refs Patrick Steinhardt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).