Git development
 help / color / mirror / Atom feed
* Re: [PATCH 1/2] t3404: add failing branch symref test
From: Phillip Wood @ 2026-06-01 13:52 UTC (permalink / raw)
  To: Son Luong Ngoc via GitGitGadget, git; +Cc: Son Luong Ngoc
In-Reply-To: <a550923440a233daea0b9819e05d6c380de00d09.1779946921.git.gitgitgadget@gmail.com>

On 28/05/2026 06:42, Son Luong Ngoc via GitGitGadget wrote:
> From: Son Luong Ngoc <sluongng@gmail.com>
> 
> rebase --update-refs queues local branch decorations by their literal
> refnames. When a branch such as refs/heads/main is a symbolic ref to
> the current branch, the normal rebase path first updates the current
> branch and the queued symref update later tries to update the same
> referent with the old value it recorded before the rebase.
> 
> Add a known-breakage test that exercises this case so that the fix can
> flip it to test_expect_success. The expected behavior is that the branch
> symref keeps pointing at the rebased current branch.

Thanks for adding a test, I'd find it easier to review this series if 
the test was added in the same patch as the fix which is our usual practice.

> +test_expect_failure '--update-refs skips branch symrefs to current branch' '
> +	test_when_finished "
> +		test_might_fail git rebase --abort &&
> +		git checkout primary &&
> +		test_might_fail git symbolic-ref -d refs/heads/update-refs-symref-alias &&
> +		test_might_fail git branch -D update-refs-symref update-refs-symref-base
> +	" &&
> +	git checkout -B update-refs-symref-base primary &&
> +	test_commit --no-tag update-refs-symref-base symref-base.t &&
> +	git checkout -B update-refs-symref &&
> +	test_commit --no-tag update-refs-symref-topic symref-topic.t &&
> +	git checkout update-refs-symref-base &&
> +	test_commit --no-tag update-refs-symref-newbase symref-newbase.t &&
> +	git checkout update-refs-symref &&
> +	git symbolic-ref refs/heads/update-refs-symref-alias refs/heads/update-refs-symref &&

I think we want to test a symref that does not match HEAD as well. 
Rather than adding a new test, can we instead add a couple of symref 
branches to the test "--update-refs updates refs correctly"?

Thanks

Phillip

> +
> +	git rebase --update-refs update-refs-symref-base 2>err &&
> +
> +	test_cmp_rev update-refs-symref-base update-refs-symref^ &&
> +	test_cmp_rev refs/heads/update-refs-symref refs/heads/update-refs-symref-alias &&
> +	test_write_lines refs/heads/update-refs-symref >expect &&
> +	git symbolic-ref refs/heads/update-refs-symref-alias >actual &&
> +	test_cmp expect actual
> +'
> +
>   test_expect_success '--update-refs updates refs correctly' '
>   	git checkout -B update-refs no-conflict-branch &&
>   	git branch -f base HEAD~4 &&


^ permalink raw reply

* [PATCH v3 2/2] http: fix memory leak in fetch_and_setup_pack_index()
From: LorenzoPegorari @ 2026-06-01 13:52 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Junio C Hamano, Patrick Steinhardt, fox, Jeff King
In-Reply-To: <cover.1780321770.git.lorenzo.pegorari2002@gmail.com>

Inside the function `fetch_and_setup_pack_index()`, when the pack
obtained using `parse_pack_index()` fails to be verified by
`verify_pack_index()`, the function returns without closing and freeing
said pack.

Fix this by calling `close_pack_index()` to munmap the index file for
the leaking pack (which might have been mmapped by `fetch_pack_index()`
or `verify_pack_index()`), and then free it, when the verification
fails.

Signed-off-by: LorenzoPegorari <lorenzo.pegorari2002@gmail.com>
---
 http.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/http.c b/http.c
index b8443b1ef4..99da4d7529 100644
--- a/http.c
+++ b/http.c
@@ -2543,11 +2543,13 @@ static int fetch_and_setup_pack_index(struct packfile_list *packs,
 	}
 
 	ret = verify_pack_index(new_pack);
-	if (!ret)
-		close_pack_index(new_pack);
+
+	close_pack_index(new_pack);
 	free(tmp_idx);
-	if (ret)
+	if (ret) {
+		free(new_pack);
 		return -1;
+	}
 
 	packfile_list_prepend(packs, new_pack);
 	return 0;
-- 
2.54.0.129.g2dffd77b94.dirty


^ permalink raw reply related

* [PATCH v3 1/2] http: cleanup function fetch_and_setup_pack_index()
From: LorenzoPegorari @ 2026-06-01 13:52 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Junio C Hamano, Patrick Steinhardt, fox, Jeff King
In-Reply-To: <cover.1780321770.git.lorenzo.pegorari2002@gmail.com>

Cleanup the function `fetch_and_setup_pack_index()` by removing the
useless call to the function `unlink()`.

This is not necessary anymore since 63aca3f7f1 (dumb-http: store
downloaded pack idx as tempfile, 2024-10-25), when `fetch_pack_index()`
started registering its return value (in this case `tmp_idx`) as a
tempfile to be deleted at process exit.

Signed-off-by: LorenzoPegorari <lorenzo.pegorari2002@gmail.com>
---
 http.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/http.c b/http.c
index 67c9c6fc60..b8443b1ef4 100644
--- a/http.c
+++ b/http.c
@@ -2538,9 +2538,7 @@ static int fetch_and_setup_pack_index(struct packfile_list *packs,
 
 	new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
 	if (!new_pack) {
-		unlink(tmp_idx);
 		free(tmp_idx);
-
 		return -1; /* parse_pack_index() already issued error message */
 	}
 
-- 
2.54.0.129.g2dffd77b94.dirty


^ permalink raw reply related

* [PATCH v3 0/2] http: fix memory leak in fetch_and_setup_pack_index()
From: LorenzoPegorari @ 2026-06-01 13:51 UTC (permalink / raw)
  To: git; +Cc: Taylor Blau, Junio C Hamano, Patrick Steinhardt, fox, Jeff King
In-Reply-To: <ahjUmMCKxREamQE-@lorenzo-VM>

Patch series that does some cleanup and fixes a memory leak present
inside the function `fetch_and_setup_pack_index()`.

LorenzoPegorari (2):
  http: cleanup function fetch_and_setup_pack_index()
  http: fix memory leak in fetch_and_setup_pack_index()

 http.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

-- 
2.54.0.129.g2dffd77b94.dirty


^ permalink raw reply

* Re: [PATCH 2/2] builtin/init-db: deprecate alias for git-init(1)
From: Phillip Wood @ 2026-06-01 13:48 UTC (permalink / raw)
  To: Patrick Steinhardt, Kristoffer Haugsbakk; +Cc: git
In-Reply-To: <ah12uk7IFxS92OR1@pks.im>



On 01/06/2026 13:10, Patrick Steinhardt wrote:
> On Mon, Jun 01, 2026 at 11:31:46AM +0200, Kristoffer Haugsbakk wrote:
>> On Mon, Jun 1, 2026, at 09:56, Patrick Steinhardt wrote:
>>> diff --git a/git.c b/git.c
>>> index a72394b599..6bf6a60360 100644
>>> --- a/git.c
>>> +++ b/git.c
>>> @@ -591,7 +591,9 @@ static struct cmd_struct commands[] = {
>>>   	{ "hook", cmd_hook, RUN_SETUP_GENTLY },
>>>   	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
>>>   	{ "init", cmd_init },
>>> +#ifndef WITH_BREAKING_CHANGES
>>>   	{ "init-db", cmd_init },
>>
>> This can be marked as deprecated.
>>
>> 	{ "init-db", cmd_init, DEPRECATED },
> 
> Ah, indeed! Added locally now, thanks.

Deprecating this command seems very sensible to me. As well as marking 
it deprecated, do we want to print a warning when it is run? I imagine 
anyone who has this command in their muscle memory is unlikely to be 
reading the man page on a regular basis so wont see the warning there.

Thanks

Phillip

> Patrick
> 


^ permalink raw reply

* Re: [PATCH v2] http: fix memory leak in fetch_and_setup_pack_index()
From: Lorenzo Pegorari @ 2026-06-01 13:34 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Taylor Blau, Junio C Hamano, Patrick Steinhardt, fox
In-Reply-To: <20260529054024.GA1104383@coredump.intra.peff.net>

On Fri, May 29, 2026 at 01:40:24AM -0400, Jeff King wrote:
> On Fri, May 29, 2026 at 01:36:59AM -0400, Jeff King wrote:
> 
> > But it _could_ be done as a preparatory patch. And the rationale for
> > doing that on its own I think is roughly:
> > 
> >   1. It is mostly doing nothing, because 63aca3f7f1 registered it as a
> >      tempfile, so it will be cleaned up at process end anyway (whether
> >      we succeed in fetching it or not).
> > 
> >   2. It is maybe a little harmful, because we are going to unlink() it
> >      now, and then later the tempfile code will try to unlink() it again
> >      (so a simultaneous fetch could have created the same file).
> 
> BTW, for (2) I wondered about going in the opposite direction. If we
> actually passed the tempfile back up, like in the patch below, then we
> could use delete_tempfile() to do the unlink (and remove it from the
> tempfile list).
> 
> And then your patch would want to similarly delete_tempfile() in its
> error path.
> 
> But I don't think it really buys us much. _If_ we were going to keep
> passing the tempfile struct up the call stack on success, then we could
> store it and call delete_tempfile() as soon as we had ran index-pack on
> it. But that's even more surgery, for again little gain (we delete our
> tempfiles a little earlier, rather than at process end).
> 
> So I'm inclined to go in the direction that shortens the code. ;)
> 
> -Peff
> 
> ---
> diff --git a/http.c b/http.c
> index ea9b16861b..e83a3857b3 100644
> --- a/http.c
> +++ b/http.c
> @@ -2546,9 +2546,10 @@ int http_fetch_ref(const char *base, struct ref *ref)
>  }
>  
>  /* Helpers for fetching packs */
> -static char *fetch_pack_index(unsigned char *hash, const char *base_url)
> +static struct tempfile *fetch_pack_index(unsigned char *hash, const char *base_url)
>  {
>  	char *url, *tmp;
> +	struct tempfile *ret;
>  	struct strbuf buf = STRBUF_INIT;
>  
>  	if (http_is_verbose)
> @@ -2575,23 +2576,24 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url)
>  	tmp = xstrfmt("%s/tmp_pack_%s.idx",
>  		      repo_get_object_directory(the_repository),
>  		      hash_to_hex(hash));
> -	register_tempfile(tmp);
> +	ret = register_tempfile(tmp);
> +	free(tmp);
>  
> -	if (http_get_file(url, tmp, NULL) != HTTP_OK) {
> +	if (http_get_file(url, ret->filename.buf, NULL) != HTTP_OK) {
>  		error("Unable to get pack index %s", url);
> -		FREE_AND_NULL(tmp);
> +		delete_tempfile(&ret);
>  	}
>  
>  	free(url);
> -	return tmp;
> +	return ret;
>  }
>  
>  static int fetch_and_setup_pack_index(struct packfile_list *packs,
>  				      unsigned char *sha1,
>  				      const char *base_url)
>  {
>  	struct packed_git *new_pack, *p;
> -	char *tmp_idx = NULL;
> +	struct tempfile *tmp_idx;
>  	int ret;
>  
>  	/*
> @@ -2607,11 +2609,9 @@ static int fetch_and_setup_pack_index(struct packfile_list *packs,
>  	if (!tmp_idx)
>  		return -1;
>  
> -	new_pack = parse_pack_index(the_repository, sha1, tmp_idx);
> +	new_pack = parse_pack_index(the_repository, sha1, tmp_idx->filename.buf);
>  	if (!new_pack) {
> -		unlink(tmp_idx);
> -		free(tmp_idx);
> -
> +		delete_tempfile(&tmp_idx);
>  		return -1; /* parse_pack_index() already issued error message */
>  	}

Yeah, I also explored the possibility (as you suggested in your first
reply to v1) of manually deleting the tempfile. In my opinion, this
isn't worth the effort, and it's complicating the code for no reason, so
in the end I opted for keeping it as simple and minimal as possible.

Thanks,

Lorenzo


^ permalink raw reply

* Re: [PATCH v2] http: fix memory leak in fetch_and_setup_pack_index()
From: Lorenzo Pegorari @ 2026-06-01 13:27 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Taylor Blau, Junio C Hamano, Patrick Steinhardt, fox
In-Reply-To: <20260529053659.GC1099450@coredump.intra.peff.net>

On Fri, May 29, 2026 at 01:36:59AM -0400, Jeff King wrote:
> On Fri, May 29, 2026 at 01:49:44AM +0200, LorenzoPegorari wrote:
> 
> > Inside the function `fetch_and_setup_pack_index()`, when the pack
> > obtained using `parse_pack_index()` fails to be verified by
> > `verify_pack_index()`, the function returns without closing and freeing
> > said pack.
> > 
> > Fix this by calling `close_pack_index()` to munmap the index file for
> > the leaking pack (which might have been mmapped by `fetch_pack_index()`
> > or `verify_pack_index()`), and then free it, when the verification
> > fails.
> > 
> > Also, do some more cleanup by removing the useless call to the function
> > `unlink()`. This is not necessary anymore since 63aca3f7f1 (dumb-http:
> > store downloaded pack idx as tempfile, 2024-10-25), when
> > `fetch_pack_index()` started registering its return value (in this case
> > `tmp_idx`) as a tempfile to be deleted at process exit.
> 
> I think the patch as-is is OK. But when I see this kind of "also, do
> this..." in a commit message it is a good time to consider whether that
> should happen in a separate patch.
> 
> Here it does not make sense to remove the unlink() afterwards; you'd
> wonder why it was not present in the cleanup added by your patch.
> 
> But it _could_ be done as a preparatory patch. And the rationale for
> doing that on its own I think is roughly:
> 
>   1. It is mostly doing nothing, because 63aca3f7f1 registered it as a
>      tempfile, so it will be cleaned up at process end anyway (whether
>      we succeed in fetching it or not).
> 
>   2. It is maybe a little harmful, because we are going to unlink() it
>      now, and then later the tempfile code will try to unlink() it again
>      (so a simultaneous fetch could have created the same file).
> 
> For something this small, though, I am OK just lumping it together.
> There are diminishing returns from polishing it further.

Yeah, this makes sense. I will separate it in 2 different patches.

> -Peff

Thanks,

Lorenzo

^ permalink raw reply

* Re: [PATCH] index-pack: retain child bases in delta cache
From: Derrick Stolee @ 2026-06-01 12:50 UTC (permalink / raw)
  To: Arijit Banerjee via GitGitGadget, git
  Cc: Ævar Arnfjörð Bjarmason, Junio C Hamano,
	Arijit Banerjee, Arijit Banerjee
In-Reply-To: <pull.2131.git.1780070763044.gitgitgadget@gmail.com>

On 5/29/2026 12:06 PM, Arijit Banerjee via GitGitGadget wrote:
> From: Arijit Banerjee <arijit@effectiveailabs.com>
> 
> When resolving a delta whose result has children of its own,
> index-pack adds the result to work_head, accounts its data in
> base_cache_used, and calls prune_base_data(). It then immediately
> frees that same data.
> 
> This bypasses the existing delta base cache policy and can force later
> descendants to reconstruct the queued base again. Let the existing
> delta_base_cache_limit pruning policy decide whether to keep or evict
> the data instead.
> 
> Signed-off-by: Arijit Banerjee <arijit@effectiveailabs.com>
> ---
>     index-pack: retain child bases in delta cache
>     
>     Speed up the local pack indexing phase of clone/fetch for large
>     delta-compressed packs by keeping reconstructed delta bases available
>     for reuse when they are queued for later delta resolution.
>     
>     When index-pack reconstructs a child base and queues it for resolving
>     descendant deltas, it currently frees that data immediately. This can
>     force the same base to be reconstructed again. Instead, keep it in the
>     existing delta base cache and let the existing delta_base_cache_limit
>     policy decide whether to retain or evict it.
>     
>     This does not add a new cache or increase the cache limit. The object
>     data is already accounted in base_cache_used, and prune_base_data() is
>     already called at this point.
>     
>     Correctness:
>     
>      * t/t5302-pack-index.sh passed all 36 tests.

Is there any chance that you ran this also with SANITIZE=leak to make
sure that we aren't introducing a memory leak? (It's hard to tell just
from the patch context, though your description is convincing.)

> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-2131%2Farijit91%2Findex-pack-retain-child-base-v1
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-2131/arijit91/index-pack-retain-child-base-v1
> Pull-Request: https://github.com/gitgitgadget/git/pull/2131

Indeed, this PR has a passing linux-leaks build that exercises this
test script. [1]

[1] https://github.com/gitgitgadget/git/actions/runs/26605615549/job/78399938323?pr=2131#step:9:1405

>     Benchmarks on a quiet Ubuntu 24.04 VM, 16 vCPU, 32 GiB RAM, local SSD:
>     
>     pack baseline patched wall-time change RSS change linux blobless 69.17s
>     57.98s 16.2% faster -0.0% linux full 280.72s 236.32s 15.8% faster +1.9%
>     
>     Five-repeat public-repo medians also improved: git.git 13.1%, libgit2
>     14.0%, redis 13.5%, cpython 4.8%.
>     
>     Perf on the linux blobless pack showed the same direction under
>     profiling: 76.64s baseline vs 61.09s patched, with similar RSS.
A lot of this information that is in your cover letter would be helpful
to include in your commit message, for posterity.

Also, I prefer to see performance numbers for these repos reflected in
results from our performance test suite. We have a test for this purpose,
so you could try running this from t/perf/ for your local copies of these
repos:

  GIT_PERF_LARGE_REPO=<path> ./run HEAD~1 HEAD -- p5302-pack-index.sh

And this should result in a standard comparison table that will help
present your results in a way that is familiar to Git contributors.

> @@ -1212,7 +1212,6 @@ static void *threaded_second_pass(void *data)
>  			list_add(&child->list, &work_head);
>  			base_cache_used += child->size;
>  			prune_base_data(NULL);
> -			free_base_data(child);
>  		} else if (child) {
A nice and simple change. Good find!

Thanks,
-Stolee


^ permalink raw reply

* Re: [PATCH 5/5] cache-tree: fix verify_cache() to catch non-adjacent D/F conflicts
From: Junio C Hamano @ 2026-06-01 12:33 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren
In-Reply-To: <a87bbaa84fd5dcb2a585f82c4a5dfa1572b54588.1776731171.git.gitgitgadget@gmail.com>

"Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com> writes:

> I could not find any caller in current git that both allows the index to
> get into this state and then tries to write it out without doing other
> checks beyond the verify_cache() call in cache_tree_update(), but
> verify_cache() is documented as a safety net for preventing corrupt
> trees and should actually provide that guarantee.

Oh, absolutely.  This kind of tightening is very much appreciated.

> diff --git a/cache-tree.c b/cache-tree.c
> index 7881b42aa2..f11844fe72 100644
> --- a/cache-tree.c
> +++ b/cache-tree.c
> @@ -192,22 +192,62 @@ static int verify_cache(struct index_state *istate, int flags)
>  	for (i = 0; i + 1 < istate->cache_nr; i++) {
>  		/* path/file always comes after path because of the way
>  		 * the cache is sorted.  Also path can appear only once,
> -		 * which means conflicting one would immediately follow.
> +		 * so path/file is likely the immediately following path
> +		 * but might be separated if there is e.g. a
> +		 * path-internal/... file.
>  		 */
>  		const struct cache_entry *this_ce = istate->cache[i];
>  		const struct cache_entry *next_ce = istate->cache[i + 1];
>  		const char *this_name = this_ce->name;
>  		const char *next_name = next_ce->name;
>  		int this_len = ce_namelen(this_ce);
> +		const char *conflict_name = NULL;
> +
>  		if (this_len < ce_namelen(next_ce) &&
> -		    next_name[this_len] == '/' &&
> +		    next_name[this_len] <= '/' &&
>  		    strncmp(this_name, next_name, this_len) == 0) {
> +			if (next_name[this_len] == '/') {
> +				conflict_name = next_name;
> +			} else if (next_name[this_len] < '/') {
> +				/*
> +				 * The immediately next entry shares our
> +				 * prefix but sorts before "path/" (e.g.,
> +				 * "path-internal" between "path" and
> +				 * "path/file", since '-' (0x2D) < '/'
> +				 * (0x2F)).  Binary search to find where
> +				 * "path/" would be and check for a D/F
> +				 * conflict there.
> +				 */
> +				struct cache_entry *other;
> +				struct strbuf probe = STRBUF_INIT;
> +				int pos;
> +
> +				strbuf_add(&probe, this_name, this_len);
> +				strbuf_addch(&probe, '/');
> +				pos = index_name_pos_sparse(istate,
> +							    probe.buf,
> +							    probe.len);
> +				strbuf_release(&probe);
> +
> +				if (pos < 0)
> +					pos = -pos - 1;
> +				if (pos >= (int)istate->cache_nr)
> +					continue;
> +				other = istate->cache[pos];
> +				if (ce_namelen(other) > this_len &&
> +				    other->name[this_len] == '/' &&
> +				    !strncmp(this_name, other->name, this_len))
> +					conflict_name = other->name;
> +			}
> +		}

The narrow and tall comment block is a sign that this loop is
getting too deeply nested.  I wonder if it makes it easier to follow
if we extract this new logic into a small helper function on its
own?

What the code checks and how it does so both make sense to me, though.

Thanks.

^ permalink raw reply

* Re: [PATCH 0/5] Duplicate entry hardening
From: Junio C Hamano @ 2026-06-01 12:33 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren
In-Reply-To: <pull.2096.git.1776731171.gitgitgadget@gmail.com>

"Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com> writes:

> We had some corrupt trees with duplicate entries in real world repositories,
> which triggered an assertion failure in merge-ort. Further, the corrupt tree
> creation in the third party tool would have been avoided had verify_cache()
> correctly checked for D/F conflicts. Provide fixes for both issues,
> including 3 preparatory changes for the merge-ort fix.
>
> Elijah Newren (5):
>   merge-ort: propagate callback errors from traverse_trees_wrapper()
>   merge-ort: drop unnecessary show_all_errors from collect_merge_info()
>   merge-ort: free diff pairs queue in clear_or_reinit_internal_opts()
>   merge-ort: abort merge when trees have duplicate entries
>   cache-tree: fix verify_cache() to catch non-adjacent D/F conflicts

This is a fix to an important corner of our system, but somehow left
in "Needs review" state for much longer than I would have liked, so
even though I am officially on vacation ;-), I took some time to
read these through (by the way it was a pleasant read, thank you).

I wonder if we create a rule like

    Those of you who have more than 30 commits in our project are
    expected to review one topic (or more) from other contributors
    for every three patches you send and ask for reviews by others.

it would help balance the patch vs review ratio, perhaps?


^ permalink raw reply

* Re: [PATCH 4/5] merge-ort: abort merge when trees have duplicate entries
From: Junio C Hamano @ 2026-06-01 12:23 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren
In-Reply-To: <0d81c027aafcb386398836ffc73b058b7ea4c702.1776731171.git.gitgitgadget@gmail.com>

"Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Elijah Newren <newren@gmail.com>
>
> Trees with duplicate entries are malformed; fsck reports "contains
> duplicate file entries" for them.  merge-ort has from the beginning
> assumed that we would never hit such trees.  It was written with the
> assumption that traverse_trees() calls collect_merge_info_callback() at
> most once per path.  The "sanity checks" in that callback (added in
> d2bc1994f363 (merge-ort: implement a very basic collect_merge_info(),
> 2020-12-13)) verify properties of each individual call but not that
> invariant.  The strmap_put() in setup_path_info() silently overwrites
> the entry from any prior call for the same path, because it assumed
> there would be no other path.  Unfortunately, supplemental data
> structures for various optimizations could still be tweaked before the
> extra paths were overwritten, and those data structures not matching
> expected state could trip various assertions.
>
> Change the return type of setup_path_info() from void to int to allow us
> to detect this case, and abort the merge with a clear error message when
> it occurs.

OK.

> @@ -1081,9 +1081,11 @@ static void setup_path_info(struct merge_options *opt,
>  			 */
>  			mi->is_null = 1;
>  	}
> -	strmap_put(&opt->priv->paths, fullpath, mi);
> +	if (strmap_put(&opt->priv->paths, fullpath, mi))
> +		return error(_("tree has duplicate entries for '%s'"), fullpath);

OK.  I was wondering what _other_ kind of malformed trees would the
updated code by this change is prepared to handle (most notably,
tree entries must be sorted, and one way to detect duplicate is to
remember one single path that we saw earlier, which would work as
long as the entries are sorted).  This "ah, we saw that path already"
approach is much more robust in that it does not have to depend on a
sorted tree.

Makes sense.

^ permalink raw reply

* Re: [PATCH 2/5] merge-ort: drop unnecessary show_all_errors from collect_merge_info()
From: Junio C Hamano @ 2026-06-01 12:23 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren
In-Reply-To: <949b5d8e3f3aefd9497a7b85d860259b9d5db418.1776731171.git.gitgitgadget@gmail.com>

"Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Elijah Newren <newren@gmail.com>
>
> collect_merge_info() has set info.show_all_errors = 1 since
> d2bc1994f363 (merge-ort: implement a very basic collect_merge_info(),
> 2020-12-13).  This setting was copied from unpack-trees.c where it
> controls batching of error messages for porcelain display, but
> merge-ort has no such error-batching logic and never needed it.
>
> With show_all_errors set, traverse_trees() captures a negative callback
> return but continues processing remaining entries rather than stopping
> immediately.  Removing the setting restores the default behavior where
> a negative return from collect_merge_info_callback() breaks out of the
> traversal loop right away, allowing a future commit to exit early when
> a corrupt tree is detected.

Nice spotting.  As the error handling eventually is to die without
making any further damange, returning early without seeing "more
errors" is a good change.

>
> Signed-off-by: Elijah Newren <newren@gmail.com>
> ---
>  merge-ort.c | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 4b8e32209d..74e9636020 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -1740,7 +1740,6 @@ static int collect_merge_info(struct merge_options *opt,
>  	setup_traverse_info(&info, opt->priv->toplevel_dir);
>  	info.fn = collect_merge_info_callback;
>  	info.data = opt;
> -	info.show_all_errors = 1;
>  
>  	if (repo_parse_tree(opt->repo, merge_base) < 0 ||
>  	    repo_parse_tree(opt->repo, side1) < 0 ||

^ permalink raw reply

* Re: [PATCH 1/5] merge-ort: propagate callback errors from traverse_trees_wrapper()
From: Junio C Hamano @ 2026-06-01 12:13 UTC (permalink / raw)
  To: Elijah Newren via GitGitGadget; +Cc: git, Elijah Newren
In-Reply-To: <282f906d1b4767d95e2a66072c280c2294a93a9f.1776731171.git.gitgitgadget@gmail.com>

"Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Elijah Newren <newren@gmail.com>
>
> traverse_trees_wrapper() saves entries from a first pass through
> traverse_trees() and then replays them through the real callback
> (collect_merge_info_callback).  However, the replay loop silently
> discards the callback return value.  This means any error reported by
> the callback during replay -- including a future check for malformed
> trees -- would be ignored, allowing the merge to proceed with corrupt
> state.
>
> Capture the return value, stop the loop on negative (error) returns,
> and propagate the error to the caller.  Note that the callback returns
> a positive mask value on success, so we normalize non-negative returns
> to 0 for the caller.

All makes perfect sense.

How would the externally visible behaviour change at this step?

Upon an error from the callback, we used to keep going and processed
other callback data in the renames structure.  We now leave the rest
unprocessed.

The caller of this helper would never have seen a failure, but now
they will.  Both callers, collect_merge_info_callback() and
handle_deferred_entries(), are reacting to a negative "error" return
well (perhaps because they sometimes call traverse_trees() in the
same control flow, which does return an error already), so
presumably there is no downside caused by aborting the innermost
process upon the first error return.



> Signed-off-by: Elijah Newren <newren@gmail.com>
> ---
>  merge-ort.c | 14 ++++++++------
>  1 file changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/merge-ort.c b/merge-ort.c
> index 00923ce3cd..4b8e32209d 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -1008,18 +1008,20 @@ static int traverse_trees_wrapper(struct index_state *istate,
>  	info->traverse_path = renames->callback_data_traverse_path;
>  	info->fn = old_fn;
>  	for (i = old_offset; i < renames->callback_data_nr; ++i) {
> -		info->fn(n,
> -			 renames->callback_data[i].mask,
> -			 renames->callback_data[i].dirmask,
> -			 renames->callback_data[i].names,
> -			 info);
> +		ret = info->fn(n,
> +			       renames->callback_data[i].mask,
> +			       renames->callback_data[i].dirmask,
> +			       renames->callback_data[i].names,
> +			       info);
> +		if (ret < 0)
> +			break;
>  	}
>  
>  	renames->callback_data_nr = old_offset;
>  	free(renames->callback_data_traverse_path);
>  	renames->callback_data_traverse_path = old_callback_data_traverse_path;
>  	info->traverse_path = NULL;
> -	return 0;
> +	return ret < 0 ? ret : 0;
>  }
>  
>  static void setup_path_info(struct merge_options *opt,

^ permalink raw reply

* Re: [PATCH 2/2] builtin/init-db: deprecate alias for git-init(1)
From: Patrick Steinhardt @ 2026-06-01 12:10 UTC (permalink / raw)
  To: Kristoffer Haugsbakk; +Cc: git
In-Reply-To: <276a92ac-b2cb-4a89-96d0-9071ab6200be@app.fastmail.com>

On Mon, Jun 01, 2026 at 11:31:46AM +0200, Kristoffer Haugsbakk wrote:
> On Mon, Jun 1, 2026, at 09:56, Patrick Steinhardt wrote:
> > diff --git a/git.c b/git.c
> > index a72394b599..6bf6a60360 100644
> > --- a/git.c
> > +++ b/git.c
> > @@ -591,7 +591,9 @@ static struct cmd_struct commands[] = {
> >  	{ "hook", cmd_hook, RUN_SETUP_GENTLY },
> >  	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
> >  	{ "init", cmd_init },
> > +#ifndef WITH_BREAKING_CHANGES
> >  	{ "init-db", cmd_init },
> 
> This can be marked as deprecated.
> 
> 	{ "init-db", cmd_init, DEPRECATED },

Ah, indeed! Added locally now, thanks.

Patrick

^ permalink raw reply

* Re: [PATCH v1 3/4] environment: move 'trust_executable_bit' into repo_config_values
From: Tian Yuchen @ 2026-06-01 10:10 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, christian.couder, ps, Ayush Chandekar, Olamide Caleb Bello
In-Reply-To: <xmqq7bokebct.fsf@gitster.g>

Hi Junio,

Thanks for the feedback!

On 5/31/26 07:17, Junio C Hamano wrote:
> Tian Yuchen <cat@malon.dev> writes:
> 
>> diff --git a/apply.c b/apply.c
>> index 249248d4f2..73ca9907f8 100644
>> --- a/apply.c
>> +++ b/apply.c
>> @@ -3890,10 +3890,12 @@ static int check_preimage(struct apply_state *state,
>>   	}
>>   
>>   	if (!state->cached && !previous) {
>> +		struct repo_config_values *cfg = repo_config_values(the_repository);
>> +
>>   		if (*ce && !(*ce)->ce_mode)
>>   			BUG("ce_mode == 0 for path '%s'", old_name);
>>   
>> -		if (trust_executable_bit || !S_ISREG(st->st_mode))
>> +		if (cfg->trust_executable_bit || !S_ISREG(st->st_mode))
>>   			st_mode = ce_mode_from_stat(*ce, st->st_mode);
>>   		else if (*ce)
>>   			st_mode = (*ce)->ce_mode;
>> diff --git a/read-cache.c b/read-cache.c
>> index 54150fe756..18af533649 100644
>> --- a/read-cache.c
>> +++ b/read-cache.c
>> @@ -204,10 +204,12 @@ void fill_stat_cache_info(struct index_state *istate, struct cache_entry *ce, st
>>   
>>   unsigned int ce_mode_from_stat(const struct cache_entry *ce, unsigned int mode)
>>   {
>> +	struct repo_config_values *cfg = repo_config_values(the_repository);
>> +
>>   	if (!has_symlinks && S_ISREG(mode) &&
>>   	    ce && S_ISLNK(ce->ce_mode))
>>   		return ce->ce_mode;
>> -	if (!trust_executable_bit && S_ISREG(mode)) {
>> +	if (!cfg->trust_executable_bit && S_ISREG(mode)) {
>>   		if (ce && S_ISREG(ce->ce_mode))
>>   			return ce->ce_mode;
>>   		return create_ce_mode(0666);
> 
> How hot are the code paths that call into this helper function?  In
> the original under some condition, it was possible to return without
> even consulting the trust_executable_bit variable, but in the
> updated code, the helper unconditionally makes a call to the
> repo_config_values() helper function even before it knows it needs
> to know the value of trust_executable_bit.

That sounds reasonable to me. I’ll adjust the conditional logic in some 
of the statements so that they short-circuit appropriately to avoid 
performance overhead.

> 
>> @@ -217,11 +219,13 @@ unsigned int ce_mode_from_stat(const struct cache_entry *ce, unsigned int mode)
>>   
>>   static unsigned int st_mode_from_ce(const struct cache_entry *ce)
>>   {
>> +	struct repo_config_values *cfg = repo_config_values(the_repository);
>> +
>>   	switch (ce->ce_mode & S_IFMT) {
>>   	case S_IFLNK:
>>   		return has_symlinks ? S_IFLNK : (S_IFREG | 0644);
>>   	case S_IFREG:
>> -		return (ce->ce_mode & (trust_executable_bit ? 0755 : 0644)) | S_IFREG;
>> +		return (ce->ce_mode & (cfg->trust_executable_bit ? 0755 : 0644)) | S_IFREG;
>>   	case S_IFGITLINK:
>>   		return S_IFDIR | 0755;
>>   	case S_IFDIR:
> 
> Ditto.
> 
>> @@ -321,6 +325,7 @@ static int ce_modified_check_fs(struct index_state *istate,
>>   static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
>>   {
>>   	unsigned int changed = 0;
>> +	struct repo_config_values *cfg = repo_config_values(the_repository);
>>   
>>   	if (ce->ce_flags & CE_REMOVE)
>>   		return MODE_CHANGED | DATA_CHANGED | TYPE_CHANGED;
>> @@ -331,7 +336,7 @@ static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
>>   		/* We consider only the owner x bit to be relevant for
>>   		 * "mode changes"
>>   		 */
>> -		if (trust_executable_bit &&
>> +		if (cfg->trust_executable_bit &&
>>   		    (0100 & (ce->ce_mode ^ st->st_mode)))
>>   			changed |= MODE_CHANGED;
>>   		break;
> 
> Ditto.
> 
>> @@ -732,6 +737,8 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
>>   			  (intent_only ? ADD_CACHE_NEW_ONLY : 0));
>>   	unsigned hash_flags = pretend ? 0 : INDEX_WRITE_OBJECT;
>>   
>> +	struct repo_config_values *cfg = repo_config_values(the_repository);
>> +
> 
> Lose the excess blank line before the new declaration.
> 
>>   	if (flags & ADD_CACHE_RENORMALIZE)
>>   		hash_flags |= INDEX_RENORMALIZE;
>>   
>> @@ -752,7 +759,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
>>   		ce->ce_flags |= CE_INTENT_TO_ADD;
>>   
>>   
>> -	if (trust_executable_bit && has_symlinks) {
>> +	if (cfg->trust_executable_bit && has_symlinks) {
>>   		ce->ce_mode = create_ce_mode(st_mode);
>>   	} else {
>>   		/* If there is an existing entry, pick the mode bits and type
> 
> Almost all of these places that care about trust_executable_bit also
> cares about has_symlinks.  I wonder if they should be converted to
> repo-local settings in the same series.

That’s true: I had actually planned to start migrating has_symlinks as 
soon as this series was approved. Since you think it would be better to 
merge them into a single series, I’ll go ahead and do that ;)

Thanks, yuchen


^ permalink raw reply

* Re: [PATCH 2/2] builtin/init-db: deprecate alias for git-init(1)
From: Kristoffer Haugsbakk @ 2026-06-01  9:31 UTC (permalink / raw)
  To: Patrick Steinhardt, git
In-Reply-To: <20260601-pks-deprecate-git-init-db-v1-2-ea3e6eebe674@pks.im>

On Mon, Jun 1, 2026, at 09:56, Patrick Steinhardt wrote:
> The git-init-db(1) command was initially only initializing the object
> database of a Git repository. This has changed over time so that the
> command also initializes all the other data structures, which is why we
> have eventually introduced git-init(1) as a more aptly named replacement
> for it.
>
> This has all happened in 2007 already, and with 5c94f87e6b (use 'init'
> instead of 'init-db' for shipped docs and tools, 2007-01-12) we have
> also adapted all user-facing documentation to mention the replacement.
> It is thus safe to assume that (almost) nobody uses git-init-db(1)
> nowadays anymore.
>
> Deprecate the command in favor of git-init(1) and wire up the removal
> when compiling Git with breaking changes enabled.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
>[snip]
> diff --git a/git.c b/git.c
> index a72394b599..6bf6a60360 100644
> --- a/git.c
> +++ b/git.c
> @@ -591,7 +591,9 @@ static struct cmd_struct commands[] = {
>  	{ "hook", cmd_hook, RUN_SETUP_GENTLY },
>  	{ "index-pack", cmd_index_pack, RUN_SETUP_GENTLY | NO_PARSEOPT },
>  	{ "init", cmd_init },
> +#ifndef WITH_BREAKING_CHANGES
>  	{ "init-db", cmd_init },

This can be marked as deprecated.

	{ "init-db", cmd_init, DEPRECATED },

> +#endif
>  	{ "interpret-trailers", cmd_interpret_trailers, RUN_SETUP_GENTLY },
>  	{ "last-modified", cmd_last_modified, RUN_SETUP },
>  	{ "log", cmd_log, RUN_SETUP },
>[snip]

^ permalink raw reply

* [PATCH v2 18/18] odb/source-loose: drop pointer to the "files" source
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Now that all callbacks of the loose source operate on `struct
odb_source_loose` directly we no longer have to reach into the "files"
source at all.

Drop this field and update `odb_source_loose_new()` to instead accept
all parameters required to initialize itself. This ensures that the
"loose" backend is a fully standalone source.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-files.c | 2 +-
 odb/source-loose.c | 8 ++++----
 odb/source-loose.h | 7 ++++---
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/odb/source-files.c b/odb/source-files.c
index 83f8066c67..5bdd042922 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -268,7 +268,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb,
 
 	CALLOC_ARRAY(files, 1);
 	odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local);
-	files->loose = odb_source_loose_new(files);
+	files->loose = odb_source_loose_new(odb, path, local);
 	files->packed = packfile_store_new(&files->base);
 
 	files->base.free = odb_source_files_free;
diff --git a/odb/source-loose.c b/odb/source-loose.c
index e174941318..7d7ea2fb84 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -705,14 +705,14 @@ static void odb_source_loose_free(struct odb_source *source)
 	free(loose);
 }
 
-struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
+struct odb_source_loose *odb_source_loose_new(struct object_database *odb,
+					      const char *path,
+					      bool local)
 {
 	struct odb_source_loose *loose;
 
 	CALLOC_ARRAY(loose, 1);
-	odb_source_init(&loose->base, files->base.odb, ODB_SOURCE_LOOSE,
-			files->base.path, files->base.local);
-	loose->files = files;
+	odb_source_init(&loose->base, odb, ODB_SOURCE_LOOSE, path, local);
 
 	loose->base.free = odb_source_loose_free;
 	loose->base.close = odb_source_loose_close;
diff --git a/odb/source-loose.h b/odb/source-loose.h
index 4dd4fd6ce3..6070aaf3ce 100644
--- a/odb/source-loose.h
+++ b/odb/source-loose.h
@@ -9,11 +9,10 @@ struct oidtree;
 
 /*
  * An object database source that stores its objects in loose format, one
- * file per object. This source is part of the files source.
+ * file per object.
  */
 struct odb_source_loose {
 	struct odb_source base;
-	struct odb_source_files *files;
 
 	/*
 	 * Used to store the results of readdir(3) calls when we are OK
@@ -31,7 +30,9 @@ struct odb_source_loose {
 	struct loose_object_map *map;
 };
 
-struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files);
+struct odb_source_loose *odb_source_loose_new(struct object_database *odb,
+					      const char *path,
+					      bool local);
 
 /*
  * Cast the given object database source to the loose backend. This will cause

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 17/18] odb/source-loose: stub out remaining callbacks
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Stub out remaining callback functions for the "loose" backend.

Note that we also stub out transactions for loose objects. In fact, we
already have the infrastructure in place for those, and we could in
theory implement those, as well. But there are separate efforts ongoing
to polish up transactional interfaces, and doing so now would likely
result in some messiness. This omission will thus be worked on in a
subsequent patch series, once the dust has settled.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 odb/source-loose.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/odb/source-loose.c b/odb/source-loose.c
index e52fc289a2..e174941318 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -645,6 +645,25 @@ static int odb_source_loose_write_object_stream(struct odb_source *source,
 	return odb_source_loose_write_stream(loose, in_stream, len, oid);
 }
 
+static int odb_source_loose_begin_transaction(struct odb_source *source UNUSED,
+					      struct odb_transaction **out UNUSED)
+{
+	/* TODO: this is a known omission that we'll want to address eventually. */
+	return error("loose source does not support transactions");
+}
+
+static int odb_source_loose_read_alternates(struct odb_source *source UNUSED,
+					    struct strvec *out UNUSED)
+{
+	return 0;
+}
+
+static int odb_source_loose_write_alternate(struct odb_source *source UNUSED,
+					    const char *alternate UNUSED)
+{
+	return error("loose source does not support alternates");
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -706,6 +725,9 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.freshen_object = odb_source_loose_freshen_object;
 	loose->base.write_object = odb_source_loose_write_object;
 	loose->base.write_object_stream = odb_source_loose_write_object_stream;
+	loose->base.begin_transaction = odb_source_loose_begin_transaction;
+	loose->base.read_alternates = odb_source_loose_read_alternates;
+	loose->base.write_alternate = odb_source_loose_write_alternate;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 16/18] odb/source-loose: wire up `write_object_stream()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Wire up the `write_object_stream()` callback.

Note that we don't move the implementation into "odb/source-loose.c".
This is because most of the logic to write loose objects is still
contained in "object-file.c", and detangling that requires us to do some
refactorings as explained in the preceding commit. So for now, the
implementation of writing an object stream is still located in
"object-file.c".

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.h      | 12 +++++++++++-
 odb/source-files.c |  3 ++-
 odb/source-loose.c | 14 ++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/object-file.h b/object-file.h
index d30f1b10b2..528c4e6e69 100644
--- a/object-file.h
+++ b/object-file.h
@@ -23,7 +23,17 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa
 struct object_info;
 struct odb_source;
 
-int odb_source_loose_write_stream(struct odb_source_loose *loose,
+/*
+ * Write the given stream into the loose object source. The only difference
+ * from the generic implementation of this function is that we don't perform an
+ * object existence check here.
+ *
+ * TODO: We should stop exposing this function altogether and move it into
+ * "odb/source-loose.c". This requires a couple of refactorings though to make
+ * `force_object_loose()` generic and is thus postponed to a later point in
+ * time.
+ */
+int odb_source_loose_write_stream(struct odb_source_loose *source,
 				  struct odb_write_stream *stream, size_t len,
 				  struct object_id *oid);
 
diff --git a/odb/source-files.c b/odb/source-files.c
index 2ba1def776..83f8066c67 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -7,6 +7,7 @@
 #include "odb.h"
 #include "odb/source.h"
 #include "odb/source-files.h"
+#include "odb/source-loose.h"
 #include "packfile.h"
 #include "strbuf.h"
 #include "write-or-die.h"
@@ -175,7 +176,7 @@ static int odb_source_files_write_object_stream(struct odb_source *source,
 						struct object_id *oid)
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
-	return odb_source_loose_write_stream(files->loose, stream, len, oid);
+	return odb_source_write_object_stream(&files->loose->base, stream, len, oid);
 }
 
 static int odb_source_files_begin_transaction(struct odb_source *source,
diff --git a/odb/source-loose.c b/odb/source-loose.c
index da8a60dba1..e52fc289a2 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -632,6 +632,19 @@ static int odb_source_loose_write_object(struct odb_source *source,
 	return 0;
 }
 
+static int odb_source_loose_write_object_stream(struct odb_source *source,
+						struct odb_write_stream *in_stream,
+						size_t len,
+						struct object_id *oid)
+{
+	/*
+	 * TODO: the implementation should be moved here, see the comment on
+	 * the called function in "object-file.h".
+	 */
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	return odb_source_loose_write_stream(loose, in_stream, len, oid);
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -692,6 +705,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.count_objects = odb_source_loose_count_objects;
 	loose->base.freshen_object = odb_source_loose_freshen_object;
 	loose->base.write_object = odb_source_loose_write_object;
+	loose->base.write_object_stream = odb_source_loose_write_object_stream;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 15/18] object-file: refactor writing objects to use loose source
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

The "object-file" subsystem still hosts the majority of logic used to
write loose objects. Eventually, we'll want to move this logic into
"odb/source-loose.c", but this isn't yet easily possible because a lot
of the writing logic is still being shared with `force_object_loose()`.

We will eventually detangle this logic so that we can indeed move all of
it into the "loose" source. Meanwhile though, refactor the code so that
it operates on a `struct odb_source_loose` directly to already make the
dependency explicit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 http-walker.c      |  3 ++-
 http.c             |  6 +++--
 object-file.c      | 75 +++++++++++++++++++++++++++---------------------------
 object-file.h      |  6 ++---
 odb/source-files.c |  3 ++-
 odb/source-loose.c |  9 ++++---
 6 files changed, 53 insertions(+), 49 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index 1b6d496548..435a726540 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -539,8 +539,9 @@ static int fetch_object(struct walker *walker, const struct object_id *oid)
 	} else if (!oideq(&obj_req->oid, &req->real_oid)) {
 		ret = error("File %s has bad hash", hex);
 	} else if (req->rename < 0) {
+		struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
 		struct strbuf buf = STRBUF_INIT;
-		odb_loose_path(the_repository->objects->sources, &buf, &req->oid);
+		odb_loose_path(files->loose, &buf, &req->oid);
 		ret = error("unable to write sha1 filename %s", buf.buf);
 		strbuf_release(&buf);
 	}
diff --git a/http.c b/http.c
index ea9b16861b..3fcc012233 100644
--- a/http.c
+++ b/http.c
@@ -2826,6 +2826,7 @@ static size_t fwrite_sha1_file(char *ptr, size_t eltsize, size_t nmemb,
 struct http_object_request *new_http_object_request(const char *base_url,
 						    const struct object_id *oid)
 {
+	struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
 	char *hex = oid_to_hex(oid);
 	struct strbuf filename = STRBUF_INIT;
 	struct strbuf prevfile = STRBUF_INIT;
@@ -2840,7 +2841,7 @@ struct http_object_request *new_http_object_request(const char *base_url,
 	oidcpy(&freq->oid, oid);
 	freq->localfile = -1;
 
-	odb_loose_path(the_repository->objects->sources, &filename, oid);
+	odb_loose_path(files->loose, &filename, oid);
 	strbuf_addf(&freq->tmpfile, "%s.temp", filename.buf);
 
 	strbuf_addf(&prevfile, "%s.prev", filename.buf);
@@ -2966,6 +2967,7 @@ void process_http_object_request(struct http_object_request *freq)
 
 int finish_http_object_request(struct http_object_request *freq)
 {
+	struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
 	struct stat st;
 	struct strbuf filename = STRBUF_INIT;
 
@@ -2992,7 +2994,7 @@ int finish_http_object_request(struct http_object_request *freq)
 		unlink_or_warn(freq->tmpfile.buf);
 		return -1;
 	}
-	odb_loose_path(the_repository->objects->sources, &filename, &freq->oid);
+	odb_loose_path(files->loose, &filename, &freq->oid);
 	freq->rename = finalize_object_file(the_repository, freq->tmpfile.buf, filename.buf);
 	strbuf_release(&filename);
 
diff --git a/object-file.c b/object-file.c
index 7bb5b31bca..bce941874e 100644
--- a/object-file.c
+++ b/object-file.c
@@ -54,14 +54,14 @@ static void fill_loose_path(struct strbuf *buf,
 	}
 }
 
-const char *odb_loose_path(struct odb_source *source,
+const char *odb_loose_path(struct odb_source_loose *loose,
 			   struct strbuf *buf,
 			   const struct object_id *oid)
 {
 	strbuf_reset(buf);
-	strbuf_addstr(buf, source->path);
+	strbuf_addstr(buf, loose->base.path);
 	strbuf_addch(buf, '/');
-	fill_loose_path(buf, oid, source->odb->repo->hash_algo);
+	fill_loose_path(buf, oid, loose->base.odb->repo->hash_algo);
 	return buf->buf;
 }
 
@@ -575,14 +575,14 @@ static void flush_loose_object_transaction(struct odb_transaction_files *transac
 }
 
 /* Finalize a file on disk, and close it. */
-static void close_loose_object(struct odb_source *source,
+static void close_loose_object(struct odb_source_loose *loose,
 			       int fd, const char *filename)
 {
-	if (source->will_destroy)
+	if (loose->base.will_destroy)
 		goto out;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		fsync_loose_object_transaction(source->odb->transaction, fd, filename);
+		fsync_loose_object_transaction(loose->base.odb->transaction, fd, filename);
 	else if (fsync_object_files > 0)
 		fsync_or_die(fd, filename);
 	else
@@ -651,7 +651,7 @@ static int create_tmpfile(struct repository *repo,
  * Returns a "fd", which should later be provided to
  * end_loose_object_common().
  */
-static int start_loose_object_common(struct odb_source *source,
+static int start_loose_object_common(struct odb_source_loose *loose,
 				     struct strbuf *tmp_file,
 				     const char *filename, unsigned flags,
 				     git_zstream *stream,
@@ -659,18 +659,18 @@ static int start_loose_object_common(struct odb_source *source,
 				     struct git_hash_ctx *c, struct git_hash_ctx *compat_c,
 				     char *hdr, int hdrlen)
 {
-	const struct git_hash_algo *algo = source->odb->repo->hash_algo;
-	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
+	const struct git_hash_algo *algo = loose->base.odb->repo->hash_algo;
+	const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo;
 	int fd;
 
-	fd = create_tmpfile(source->odb->repo, tmp_file, filename);
+	fd = create_tmpfile(loose->base.odb->repo, tmp_file, filename);
 	if (fd < 0) {
 		if (flags & ODB_WRITE_OBJECT_SILENT)
 			return -1;
 		else if (errno == EACCES)
 			return error(_("insufficient permission for adding "
 				       "an object to repository database %s"),
-				     source->path);
+				     loose->base.path);
 		else
 			return error_errno(
 				_("unable to create temporary file"));
@@ -700,14 +700,14 @@ static int start_loose_object_common(struct odb_source *source,
  * Common steps for the inner git_deflate() loop for writing loose
  * objects. Returns what git_deflate() returns.
  */
-static int write_loose_object_common(struct odb_source *source,
+static int write_loose_object_common(struct odb_source_loose *loose,
 				     struct git_hash_ctx *c, struct git_hash_ctx *compat_c,
 				     git_zstream *stream, const int flush,
 				     unsigned char *in0, const int fd,
 				     unsigned char *compressed,
 				     const size_t compressed_len)
 {
-	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
+	const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo;
 	int ret;
 
 	ret = git_deflate(stream, flush ? Z_FINISH : 0);
@@ -728,12 +728,12 @@ static int write_loose_object_common(struct odb_source *source,
  * - End the compression of zlib stream.
  * - Get the calculated oid to "oid".
  */
-static int end_loose_object_common(struct odb_source *source,
+static int end_loose_object_common(struct odb_source_loose *loose,
 				   struct git_hash_ctx *c, struct git_hash_ctx *compat_c,
 				   git_zstream *stream, struct object_id *oid,
 				   struct object_id *compat_oid)
 {
-	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
+	const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo;
 	int ret;
 
 	ret = git_deflate_end_gently(stream);
@@ -746,7 +746,7 @@ static int end_loose_object_common(struct odb_source *source,
 	return Z_OK;
 }
 
-int write_loose_object(struct odb_source *source,
+int write_loose_object(struct odb_source_loose *loose,
 		       const struct object_id *oid, char *hdr,
 		       int hdrlen, const void *buf, unsigned long len,
 		       time_t mtime, unsigned flags)
@@ -760,11 +760,11 @@ int write_loose_object(struct odb_source *source,
 	static struct strbuf filename = STRBUF_INIT;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_transaction(source->odb->transaction);
+		prepare_loose_object_transaction(loose->base.odb->transaction);
 
-	odb_loose_path(source, &filename, oid);
+	odb_loose_path(loose, &filename, oid);
 
-	fd = start_loose_object_common(source, &tmp_file, filename.buf, flags,
+	fd = start_loose_object_common(loose, &tmp_file, filename.buf, flags,
 				       &stream, compressed, sizeof(compressed),
 				       &c, NULL, hdr, hdrlen);
 	if (fd < 0)
@@ -776,14 +776,14 @@ int write_loose_object(struct odb_source *source,
 	do {
 		unsigned char *in0 = stream.next_in;
 
-		ret = write_loose_object_common(source, &c, NULL, &stream, 1, in0, fd,
+		ret = write_loose_object_common(loose, &c, NULL, &stream, 1, in0, fd,
 						compressed, sizeof(compressed));
 	} while (ret == Z_OK);
 
 	if (ret != Z_STREAM_END)
 		die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
 		    ret);
-	ret = end_loose_object_common(source, &c, NULL, &stream, &parano_oid, NULL);
+	ret = end_loose_object_common(loose, &c, NULL, &stream, &parano_oid, NULL);
 	if (ret != Z_OK)
 		die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
 		    ret);
@@ -791,7 +791,7 @@ int write_loose_object(struct odb_source *source,
 		die(_("confused by unstable object source data for %s"),
 		    oid_to_hex(oid));
 
-	close_loose_object(source, fd, tmp_file.buf);
+	close_loose_object(loose, fd, tmp_file.buf);
 
 	if (mtime) {
 		struct utimbuf utb;
@@ -802,16 +802,15 @@ int write_loose_object(struct odb_source *source,
 			warning_errno(_("failed utime() on %s"), tmp_file.buf);
 	}
 
-	return finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf,
+	return finalize_object_file_flags(loose->base.odb->repo, tmp_file.buf, filename.buf,
 					  FOF_SKIP_COLLISION_CHECK);
 }
 
-int odb_source_loose_write_stream(struct odb_source *source,
+int odb_source_loose_write_stream(struct odb_source_loose *loose,
 				  struct odb_write_stream *in_stream, size_t len,
 				  struct object_id *oid)
 {
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
+	const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo;
 	struct object_id compat_oid;
 	int fd, ret, err = 0, flush = 0;
 	unsigned char compressed[4096];
@@ -825,10 +824,10 @@ int odb_source_loose_write_stream(struct odb_source *source,
 	int hdrlen;
 
 	if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
-		prepare_loose_object_transaction(source->odb->transaction);
+		prepare_loose_object_transaction(loose->base.odb->transaction);
 
 	/* Since oid is not determined, save tmp file to odb path. */
-	strbuf_addf(&filename, "%s/", source->path);
+	strbuf_addf(&filename, "%s/", loose->base.path);
 	hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len);
 
 	/*
@@ -839,7 +838,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
 	 *  - Setup zlib stream for compression.
 	 *  - Start to feed header to zlib stream.
 	 */
-	fd = start_loose_object_common(source, &tmp_file, filename.buf, 0,
+	fd = start_loose_object_common(loose, &tmp_file, filename.buf, 0,
 				       &stream, compressed, sizeof(compressed),
 				       &c, &compat_c, hdr, hdrlen);
 	if (fd < 0) {
@@ -867,7 +866,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
 			if (in_stream->is_finished)
 				flush = 1;
 		}
-		ret = write_loose_object_common(source, &c, &compat_c, &stream, flush, in0, fd,
+		ret = write_loose_object_common(loose, &c, &compat_c, &stream, flush, in0, fd,
 						compressed, sizeof(compressed));
 		/*
 		 * Unlike write_loose_object(), we do not have the entire
@@ -890,16 +889,16 @@ int odb_source_loose_write_stream(struct odb_source *source,
 	 */
 	if (ret != Z_STREAM_END)
 		die(_("unable to stream deflate new object (%d)"), ret);
-	ret = end_loose_object_common(source, &c, &compat_c, &stream, oid, &compat_oid);
+	ret = end_loose_object_common(loose, &c, &compat_c, &stream, oid, &compat_oid);
 	if (ret != Z_OK)
 		die(_("deflateEnd on stream object failed (%d)"), ret);
-	close_loose_object(source, fd, tmp_file.buf);
+	close_loose_object(loose, fd, tmp_file.buf);
 
-	if (odb_freshen_object(source->odb, oid)) {
+	if (odb_freshen_object(loose->base.odb, oid)) {
 		unlink_or_warn(tmp_file.buf);
 		goto cleanup;
 	}
-	odb_loose_path(source, &filename, oid);
+	odb_loose_path(loose, &filename, oid);
 
 	/* We finally know the object path, and create the missing dir. */
 	dirlen = directory_size(filename.buf);
@@ -907,7 +906,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
 		struct strbuf dir = STRBUF_INIT;
 		strbuf_add(&dir, filename.buf, dirlen);
 
-		if (safe_create_dir_in_gitdir(source->odb->repo, dir.buf) &&
+		if (safe_create_dir_in_gitdir(loose->base.odb->repo, dir.buf) &&
 		    errno != EEXIST) {
 			err = error_errno(_("unable to create directory %s"), dir.buf);
 			strbuf_release(&dir);
@@ -916,10 +915,10 @@ int odb_source_loose_write_stream(struct odb_source *source,
 		strbuf_release(&dir);
 	}
 
-	err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf,
+	err = finalize_object_file_flags(loose->base.odb->repo, tmp_file.buf, filename.buf,
 					 FOF_SKIP_COLLISION_CHECK);
 	if (!err && compat)
-		err = repo_add_loose_object_map(files->loose, oid, &compat_oid);
+		err = repo_add_loose_object_map(loose, oid, &compat_oid);
 cleanup:
 	strbuf_release(&tmp_file);
 	strbuf_release(&filename);
@@ -957,7 +956,7 @@ int force_object_loose(struct odb_source *source,
 				     oid_to_hex(oid), compat->name);
 	}
 	hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
-	ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0);
+	ret = write_loose_object(files->loose, oid, hdr, hdrlen, buf, len, mtime, 0);
 	if (!ret && compat)
 		ret = repo_add_loose_object_map(files->loose, oid, &compat_oid);
 	free(buf);
diff --git a/object-file.h b/object-file.h
index 2b32592de1..d30f1b10b2 100644
--- a/object-file.h
+++ b/object-file.h
@@ -23,7 +23,7 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa
 struct object_info;
 struct odb_source;
 
-int odb_source_loose_write_stream(struct odb_source *source,
+int odb_source_loose_write_stream(struct odb_source_loose *loose,
 				  struct odb_write_stream *stream, size_t len,
 				  struct object_id *oid);
 
@@ -31,7 +31,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
  * Put in `buf` the name of the file in the local object database that
  * would be used to store a loose object with the specified oid.
  */
-const char *odb_loose_path(struct odb_source *source,
+const char *odb_loose_path(struct odb_source_loose *source,
 			   struct strbuf *buf,
 			   const struct object_id *oid);
 
@@ -127,7 +127,7 @@ void write_object_file_prepare(const struct git_hash_algo *algo,
 			       const void *buf, unsigned long len,
 			       enum object_type type, struct object_id *oid,
 			       char *hdr, int *hdrlen);
-int write_loose_object(struct odb_source *source,
+int write_loose_object(struct odb_source_loose *loose,
 		       const struct object_id *oid, char *hdr,
 		       int hdrlen, const void *buf, unsigned long len,
 		       time_t mtime, unsigned flags);
diff --git a/odb/source-files.c b/odb/source-files.c
index 52ba04237a..2ba1def776 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -174,7 +174,8 @@ static int odb_source_files_write_object_stream(struct odb_source *source,
 						size_t len,
 						struct object_id *oid)
 {
-	return odb_source_loose_write_stream(source, stream, len, oid);
+	struct odb_source_files *files = odb_source_files_downcast(source);
+	return odb_source_loose_write_stream(files->loose, stream, len, oid);
 }
 
 static int odb_source_files_begin_transaction(struct odb_source *source,
diff --git a/odb/source-loose.c b/odb/source-loose.c
index c91018109e..da8a60dba1 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -220,7 +220,7 @@ static int odb_source_loose_read_object_info(struct odb_source *source,
 	if (flags & OBJECT_INFO_SECOND_READ)
 		return -1;
 
-	odb_loose_path(source, &buf, oid);
+	odb_loose_path(loose, &buf, oid);
 	return read_object_info_from_path(loose, buf.buf, oid, oi, flags);
 }
 
@@ -238,7 +238,7 @@ static int open_loose_object(struct odb_source_loose *loose,
 	static struct strbuf buf = STRBUF_INIT;
 	int fd;
 
-	*path = odb_loose_path(&loose->base, &buf, oid);
+	*path = odb_loose_path(loose, &buf, oid);
 	fd = git_open(*path);
 	if (fd >= 0)
 		return fd;
@@ -584,8 +584,9 @@ static int odb_source_loose_count_objects(struct odb_source *source,
 static int odb_source_loose_freshen_object(struct odb_source *source,
 					   const struct object_id *oid)
 {
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
 	static struct strbuf path = STRBUF_INIT;
-	odb_loose_path(source, &path, oid);
+	odb_loose_path(loose, &path, oid);
 	return !!check_and_freshen_file(path.buf, 1);
 }
 
@@ -624,7 +625,7 @@ static int odb_source_loose_write_object(struct odb_source *source,
 	write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
 	if (odb_freshen_object(source->odb, oid))
 		return 0;
-	if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags))
+	if (write_loose_object(loose, oid, hdr, hdrlen, buf, len, 0, flags))
 		return -1;
 	if (compat)
 		return repo_add_loose_object_map(loose, oid, &compat_oid);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 14/18] odb/source-loose: wire up `write_object()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_write_object()` from "object-file.c" into
"odb/source-loose.c" and wire it up as the `write_object()` callback of
the loose source.

As in preceding commits, this requires us to expose a couple of generic
functions from "object-file.c" as they are used in both subsystems now.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 58 ++++++++----------------------------------------------
 object-file.h      | 14 +++++++------
 odb/source-files.c |  5 +++--
 odb/source-loose.c | 44 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 63 insertions(+), 58 deletions(-)

diff --git a/object-file.c b/object-file.c
index fe24f00d1b..7bb5b31bca 100644
--- a/object-file.c
+++ b/object-file.c
@@ -326,10 +326,10 @@ static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_c
 	git_hash_final_oid(oid, c);
 }
 
-static void write_object_file_prepare(const struct git_hash_algo *algo,
-				      const void *buf, unsigned long len,
-				      enum object_type type, struct object_id *oid,
-				      char *hdr, int *hdrlen)
+void write_object_file_prepare(const struct git_hash_algo *algo,
+			       const void *buf, unsigned long len,
+			       enum object_type type, struct object_id *oid,
+			       char *hdr, int *hdrlen)
 {
 	struct git_hash_ctx c;
 
@@ -746,10 +746,10 @@ static int end_loose_object_common(struct odb_source *source,
 	return Z_OK;
 }
 
-static int write_loose_object(struct odb_source *source,
-			      const struct object_id *oid, char *hdr,
-			      int hdrlen, const void *buf, unsigned long len,
-			      time_t mtime, unsigned flags)
+int write_loose_object(struct odb_source *source,
+		       const struct object_id *oid, char *hdr,
+		       int hdrlen, const void *buf, unsigned long len,
+		       time_t mtime, unsigned flags)
 {
 	int fd, ret;
 	unsigned char compressed[4096];
@@ -926,48 +926,6 @@ int odb_source_loose_write_stream(struct odb_source *source,
 	return err;
 }
 
-int odb_source_loose_write_object(struct odb_source *source,
-				  const void *buf, unsigned long len,
-				  enum object_type type, struct object_id *oid,
-				  struct object_id *compat_oid_in,
-				  enum odb_write_object_flags flags)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	const struct git_hash_algo *algo = source->odb->repo->hash_algo;
-	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
-	struct object_id compat_oid;
-	char hdr[MAX_HEADER_LEN];
-	int hdrlen = sizeof(hdr);
-
-	/* Generate compat_oid */
-	if (compat) {
-		if (compat_oid_in)
-			oidcpy(&compat_oid, compat_oid_in);
-		else if (type == OBJ_BLOB)
-			hash_object_file(compat, buf, len, type, &compat_oid);
-		else {
-			struct strbuf converted = STRBUF_INIT;
-			convert_object_file(source->odb->repo, &converted, algo, compat,
-					    buf, len, type, 0);
-			hash_object_file(compat, converted.buf, converted.len,
-					 type, &compat_oid);
-			strbuf_release(&converted);
-		}
-	}
-
-	/* Normally if we have it in the pack then we do not bother writing
-	 * it out into .git/objects/??/?{38} file.
-	 */
-	write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
-	if (odb_freshen_object(source->odb, oid))
-		return 0;
-	if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags))
-		return -1;
-	if (compat)
-		return repo_add_loose_object_map(files->loose, oid, &compat_oid);
-	return 0;
-}
-
 int force_object_loose(struct odb_source *source,
 		       const struct object_id *oid, time_t mtime)
 {
diff --git a/object-file.h b/object-file.h
index 1d90df9d98..2b32592de1 100644
--- a/object-file.h
+++ b/object-file.h
@@ -23,12 +23,6 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa
 struct object_info;
 struct odb_source;
 
-int odb_source_loose_write_object(struct odb_source *source,
-				  const void *buf, unsigned long len,
-				  enum object_type type, struct object_id *oid,
-				  struct object_id *compat_oid_in,
-				  enum odb_write_object_flags flags);
-
 int odb_source_loose_write_stream(struct odb_source *source,
 				  struct odb_write_stream *stream, size_t len,
 				  struct object_id *oid);
@@ -129,6 +123,14 @@ int finalize_object_file_flags(struct repository *repo,
 void hash_object_file(const struct git_hash_algo *algo, const void *buf,
 		      unsigned long len, enum object_type type,
 		      struct object_id *oid);
+void write_object_file_prepare(const struct git_hash_algo *algo,
+			       const void *buf, unsigned long len,
+			       enum object_type type, struct object_id *oid,
+			       char *hdr, int *hdrlen);
+int write_loose_object(struct odb_source *source,
+		       const struct object_id *oid, char *hdr,
+		       int hdrlen, const void *buf, unsigned long len,
+		       time_t mtime, unsigned flags);
 
 /* Helper to check and "touch" a file */
 int check_and_freshen_file(const char *fn, int freshen);
diff --git a/odb/source-files.c b/odb/source-files.c
index ef548e6fe6..52ba04237a 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -164,8 +164,9 @@ static int odb_source_files_write_object(struct odb_source *source,
 					 struct object_id *compat_oid,
 					 enum odb_write_object_flags flags)
 {
-	return odb_source_loose_write_object(source, buf, len, type,
-					     oid, compat_oid, flags);
+	struct odb_source_files *files = odb_source_files_downcast(source);
+	return odb_source_write_object(&files->loose->base, buf, len, type,
+				       oid, compat_oid, flags);
 }
 
 static int odb_source_files_write_object_stream(struct odb_source *source,
diff --git a/odb/source-loose.c b/odb/source-loose.c
index e519365d23..c91018109e 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -5,6 +5,7 @@
 #include "hex.h"
 #include "loose.h"
 #include "object-file.h"
+#include "object-file-convert.h"
 #include "odb.h"
 #include "odb/source-files.h"
 #include "odb/source-loose.h"
@@ -588,6 +589,48 @@ static int odb_source_loose_freshen_object(struct odb_source *source,
 	return !!check_and_freshen_file(path.buf, 1);
 }
 
+static int odb_source_loose_write_object(struct odb_source *source,
+					 const void *buf, unsigned long len,
+					 enum object_type type, struct object_id *oid,
+					 struct object_id *compat_oid_in,
+					 enum odb_write_object_flags flags)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	const struct git_hash_algo *algo = source->odb->repo->hash_algo;
+	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
+	struct object_id compat_oid;
+	char hdr[MAX_HEADER_LEN];
+	int hdrlen = sizeof(hdr);
+
+	/* Generate compat_oid */
+	if (compat) {
+		if (compat_oid_in)
+			oidcpy(&compat_oid, compat_oid_in);
+		else if (type == OBJ_BLOB)
+			hash_object_file(compat, buf, len, type, &compat_oid);
+		else {
+			struct strbuf converted = STRBUF_INIT;
+			convert_object_file(source->odb->repo, &converted, algo, compat,
+					    buf, len, type, 0);
+			hash_object_file(compat, converted.buf, converted.len,
+					 type, &compat_oid);
+			strbuf_release(&converted);
+		}
+	}
+
+	/* Normally if we have it in the pack then we do not bother writing
+	 * it out into .git/objects/??/?{38} file.
+	 */
+	write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
+	if (odb_freshen_object(source->odb, oid))
+		return 0;
+	if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags))
+		return -1;
+	if (compat)
+		return repo_add_loose_object_map(loose, oid, &compat_oid);
+	return 0;
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -647,6 +690,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len;
 	loose->base.count_objects = odb_source_loose_count_objects;
 	loose->base.freshen_object = odb_source_loose_freshen_object;
+	loose->base.write_object = odb_source_loose_write_object;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 13/18] loose: refactor object map to operate on `struct odb_source_loose`
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

While the loose object map functions in "loose.c" accept a generic
`struct odb_source *`, they always expect this to be the "files"
backend. Furthermore, the subsystem doesn't even care about the "files"
backend, but only uses it as a stepping stone to get to the "loose"
backend.

This assumption is implicit and thus not immediately obvious. Refactor
the interfaces to instead operate on a `struct odb_source_loose`
instead, which eliminates the implicit dependency and unnecessary detour
via the "files" source.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 loose.c       | 45 ++++++++++++++++++++++-----------------------
 loose.h       |  4 ++--
 object-file.c |  9 ++++++---
 3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/loose.c b/loose.c
index f7a3dd1a72..0b626c1b85 100644
--- a/loose.c
+++ b/loose.c
@@ -46,38 +46,36 @@ static int insert_oid_pair(kh_oid_map_t *map, const struct object_id *key, const
 	return 1;
 }
 
-static int insert_loose_map(struct odb_source *source,
+static int insert_loose_map(struct odb_source_loose *loose,
 			    const struct object_id *oid,
 			    const struct object_id *compat_oid)
 {
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	struct loose_object_map *map = files->loose->map;
+	struct loose_object_map *map = loose->map;
 	int inserted = 0;
 
 	inserted |= insert_oid_pair(map->to_compat, oid, compat_oid);
 	inserted |= insert_oid_pair(map->to_storage, compat_oid, oid);
 	if (inserted)
-		oidtree_insert(files->loose->cache, compat_oid, NULL);
+		oidtree_insert(loose->cache, compat_oid, NULL);
 
 	return inserted;
 }
 
-static int load_one_loose_object_map(struct repository *repo, struct odb_source *source)
+static int load_one_loose_object_map(struct repository *repo, struct odb_source_loose *loose)
 {
-	struct odb_source_files *files = odb_source_files_downcast(source);
 	struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
 	FILE *fp;
 
-	if (!files->loose->map)
-		loose_object_map_init(&files->loose->map);
-	if (!files->loose->cache) {
-		ALLOC_ARRAY(files->loose->cache, 1);
-		oidtree_init(files->loose->cache);
+	if (!loose->map)
+		loose_object_map_init(&loose->map);
+	if (!loose->cache) {
+		ALLOC_ARRAY(loose->cache, 1);
+		oidtree_init(loose->cache);
 	}
 
-	insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree);
-	insert_loose_map(source, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob);
-	insert_loose_map(source, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid);
+	insert_loose_map(loose, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree);
+	insert_loose_map(loose, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob);
+	insert_loose_map(loose, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid);
 
 	repo_common_path_replace(repo, &path, "objects/loose-object-idx");
 	fp = fopen(path.buf, "rb");
@@ -97,7 +95,7 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source
 		    parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) ||
 		    p != buf.buf + buf.len)
 			goto err;
-		insert_loose_map(source, &oid, &compat_oid);
+		insert_loose_map(loose, &oid, &compat_oid);
 	}
 
 	strbuf_release(&buf);
@@ -119,7 +117,8 @@ int repo_read_loose_object_map(struct repository *repo)
 	odb_prepare_alternates(repo->objects);
 
 	for (source = repo->objects->sources; source; source = source->next) {
-		if (load_one_loose_object_map(repo, source) < 0) {
+		struct odb_source_files *files = odb_source_files_downcast(source);
+		if (load_one_loose_object_map(repo, files->loose) < 0) {
 			return -1;
 		}
 	}
@@ -171,7 +170,7 @@ int repo_write_loose_object_map(struct repository *repo)
 	return -1;
 }
 
-static int write_one_object(struct odb_source *source,
+static int write_one_object(struct odb_source_loose *loose,
 			    const struct object_id *oid,
 			    const struct object_id *compat_oid)
 {
@@ -180,7 +179,7 @@ static int write_one_object(struct odb_source *source,
 	struct stat st;
 	struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
 
-	strbuf_addf(&path, "%s/loose-object-idx", source->path);
+	strbuf_addf(&path, "%s/loose-object-idx", loose->base.path);
 	hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1);
 
 	fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666);
@@ -196,7 +195,7 @@ static int write_one_object(struct odb_source *source,
 		goto errout;
 	if (close(fd))
 		goto errout;
-	adjust_shared_perm(source->odb->repo, path.buf);
+	adjust_shared_perm(loose->base.odb->repo, path.buf);
 	rollback_lock_file(&lock);
 	strbuf_release(&buf);
 	strbuf_release(&path);
@@ -210,18 +209,18 @@ static int write_one_object(struct odb_source *source,
 	return -1;
 }
 
-int repo_add_loose_object_map(struct odb_source *source,
+int repo_add_loose_object_map(struct odb_source_loose *loose,
 			      const struct object_id *oid,
 			      const struct object_id *compat_oid)
 {
 	int inserted = 0;
 
-	if (!should_use_loose_object_map(source->odb->repo))
+	if (!should_use_loose_object_map(loose->base.odb->repo))
 		return 0;
 
-	inserted = insert_loose_map(source, oid, compat_oid);
+	inserted = insert_loose_map(loose, oid, compat_oid);
 	if (inserted)
-		return write_one_object(source, oid, compat_oid);
+		return write_one_object(loose, oid, compat_oid);
 	return 0;
 }
 
diff --git a/loose.h b/loose.h
index 6af1702973..6c9b3f4571 100644
--- a/loose.h
+++ b/loose.h
@@ -4,7 +4,7 @@
 #include "khash.h"
 
 struct repository;
-struct odb_source;
+struct odb_source_loose;
 
 struct loose_object_map {
 	kh_oid_map_t *to_compat;
@@ -17,7 +17,7 @@ int repo_loose_object_map_oid(struct repository *repo,
 			      const struct object_id *src,
 			      const struct git_hash_algo *dest_algo,
 			      struct object_id *dest);
-int repo_add_loose_object_map(struct odb_source *source,
+int repo_add_loose_object_map(struct odb_source_loose *loose,
 			      const struct object_id *oid,
 			      const struct object_id *compat_oid);
 int repo_read_loose_object_map(struct repository *repo);
diff --git a/object-file.c b/object-file.c
index 0689a4e67b..fe24f00d1b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -810,6 +810,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
 				  struct odb_write_stream *in_stream, size_t len,
 				  struct object_id *oid)
 {
+	struct odb_source_files *files = odb_source_files_downcast(source);
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	struct object_id compat_oid;
 	int fd, ret, err = 0, flush = 0;
@@ -918,7 +919,7 @@ int odb_source_loose_write_stream(struct odb_source *source,
 	err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf,
 					 FOF_SKIP_COLLISION_CHECK);
 	if (!err && compat)
-		err = repo_add_loose_object_map(source, oid, &compat_oid);
+		err = repo_add_loose_object_map(files->loose, oid, &compat_oid);
 cleanup:
 	strbuf_release(&tmp_file);
 	strbuf_release(&filename);
@@ -931,6 +932,7 @@ int odb_source_loose_write_object(struct odb_source *source,
 				  struct object_id *compat_oid_in,
 				  enum odb_write_object_flags flags)
 {
+	struct odb_source_files *files = odb_source_files_downcast(source);
 	const struct git_hash_algo *algo = source->odb->repo->hash_algo;
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	struct object_id compat_oid;
@@ -962,13 +964,14 @@ int odb_source_loose_write_object(struct odb_source *source,
 	if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags))
 		return -1;
 	if (compat)
-		return repo_add_loose_object_map(source, oid, &compat_oid);
+		return repo_add_loose_object_map(files->loose, oid, &compat_oid);
 	return 0;
 }
 
 int force_object_loose(struct odb_source *source,
 		       const struct object_id *oid, time_t mtime)
 {
+	struct odb_source_files *files = odb_source_files_downcast(source);
 	const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo;
 	void *buf;
 	unsigned long len;
@@ -998,7 +1001,7 @@ int force_object_loose(struct odb_source *source,
 	hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
 	ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0);
 	if (!ret && compat)
-		ret = repo_add_loose_object_map(source, oid, &compat_oid);
+		ret = repo_add_loose_object_map(files->loose, oid, &compat_oid);
 	free(buf);
 
 	return ret;

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 12/18] odb/source-loose: wire up `freshen_object()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_freshen_object()` from "object-file.c" into
"odb/source-loose.c" and wire it up as the `freshen_object()` callback
of the loose source.

As part of the move, `check_and_freshen_source()` is inlined into the
callback function, as it has no other callers anymore.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 object-file.c      | 15 ---------------
 object-file.h      |  3 ---
 odb/source-files.c |  2 +-
 odb/source-loose.c |  9 +++++++++
 4 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/object-file.c b/object-file.c
index c83136cf70..0689a4e67b 100644
--- a/object-file.c
+++ b/object-file.c
@@ -87,15 +87,6 @@ int check_and_freshen_file(const char *fn, int freshen)
 	return 1;
 }
 
-static int check_and_freshen_source(struct odb_source *source,
-				    const struct object_id *oid,
-				    int freshen)
-{
-	static struct strbuf path = STRBUF_INIT;
-	odb_loose_path(source, &path, oid);
-	return check_and_freshen_file(path.buf, freshen);
-}
-
 int format_object_header(char *str, size_t size, enum object_type type,
 			 size_t objsize)
 {
@@ -815,12 +806,6 @@ static int write_loose_object(struct odb_source *source,
 					  FOF_SKIP_COLLISION_CHECK);
 }
 
-int odb_source_loose_freshen_object(struct odb_source *source,
-				    const struct object_id *oid)
-{
-	return !!check_and_freshen_source(source, oid, 1);
-}
-
 int odb_source_loose_write_stream(struct odb_source *source,
 				  struct odb_write_stream *in_stream, size_t len,
 				  struct object_id *oid)
diff --git a/object-file.h b/object-file.h
index 506ca6be40..1d90df9d98 100644
--- a/object-file.h
+++ b/object-file.h
@@ -23,9 +23,6 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa
 struct object_info;
 struct odb_source;
 
-int odb_source_loose_freshen_object(struct odb_source *source,
-				    const struct object_id *oid);
-
 int odb_source_loose_write_object(struct odb_source *source,
 				  const void *buf, unsigned long len,
 				  enum object_type type, struct object_id *oid,
diff --git a/odb/source-files.c b/odb/source-files.c
index d5454e170d..ef548e6fe6 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -152,7 +152,7 @@ static int odb_source_files_freshen_object(struct odb_source *source,
 {
 	struct odb_source_files *files = odb_source_files_downcast(source);
 	if (packfile_store_freshen_object(files->packed, oid) ||
-	    odb_source_loose_freshen_object(source, oid))
+	    odb_source_freshen_object(&files->loose->base, oid))
 		return 1;
 	return 0;
 }
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 27be066327..e519365d23 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -580,6 +580,14 @@ static int odb_source_loose_count_objects(struct odb_source *source,
 	return ret;
 }
 
+static int odb_source_loose_freshen_object(struct odb_source *source,
+					   const struct object_id *oid)
+{
+	static struct strbuf path = STRBUF_INIT;
+	odb_loose_path(source, &path, oid);
+	return !!check_and_freshen_file(path.buf, 1);
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -638,6 +646,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.for_each_object = odb_source_loose_for_each_object;
 	loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len;
 	loose->base.count_objects = odb_source_loose_count_objects;
+	loose->base.freshen_object = odb_source_loose_freshen_object;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 11/18] odb/source-loose: drop `odb_source_loose_has_object()`
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

The function `odb_source_loose_has_object()` checks whether a specific
object exists as a loose object on disk by using lstat(3p). This
interface is somewhat redundant, as we typically check for object
existence in a generic way via `odb_source_read_object_info()`.

In fact, these two calls are redundant in case the latter is called in a
specific way: when called without an object info request and without the
`OBJECT_INFO_QUICK` flag, then we will end up doing the same call to
lstat(3p) in `read_object_info_from_path()`.

Drop the function and adapt callers to instead use the generic
interface so that its calling conventions align with that of other
sources.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/pack-objects.c | 12 ++++++++----
 object-file.c          | 12 ++++--------
 object-file.h          |  8 --------
 3 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 480cc0bd8c..a6be3d659f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1750,9 +1750,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid,
 		 * skip the local object source.
 		 */
 		struct odb_source *source = the_repository->objects->sources->next;
-		for (; source; source = source->next)
-			if (odb_source_loose_has_object(source, oid))
+		for (; source; source = source->next) {
+			struct odb_source_files *files = odb_source_files_downcast(source);
+			if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0))
 				return 0;
+		}
 	}
 
 	/*
@@ -4135,9 +4137,11 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
 			struct odb_source *source = the_repository->objects->sources;
 			int found = 0;
 
-			for (; !found && source; source = source->next)
-				if (odb_source_loose_has_object(source, oid))
+			for (; !found && source; source = source->next) {
+				struct odb_source_files *files = odb_source_files_downcast(source);
+				if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0))
 					found = 1;
+			}
 
 			/*
 			 * If a traversed tree has a missing blob then we want
diff --git a/object-file.c b/object-file.c
index 9b2044de37..c83136cf70 100644
--- a/object-file.c
+++ b/object-file.c
@@ -96,12 +96,6 @@ static int check_and_freshen_source(struct odb_source *source,
 	return check_and_freshen_file(path.buf, freshen);
 }
 
-int odb_source_loose_has_object(struct odb_source *source,
-				const struct object_id *oid)
-{
-	return check_and_freshen_source(source, oid, 0);
-}
-
 int format_object_header(char *str, size_t size, enum object_type type,
 			 size_t objsize)
 {
@@ -1000,9 +994,11 @@ int force_object_loose(struct odb_source *source,
 	int hdrlen;
 	int ret;
 
-	for (struct odb_source *s = source->odb->sources; s; s = s->next)
-		if (odb_source_loose_has_object(s, oid))
+	for (struct odb_source *s = source->odb->sources; s; s = s->next) {
+		struct odb_source_files *files = odb_source_files_downcast(s);
+		if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0))
 			return 0;
+	}
 
 	oi.typep = &type;
 	oi.sizep = &len;
diff --git a/object-file.h b/object-file.h
index bc72d89f54..506ca6be40 100644
--- a/object-file.h
+++ b/object-file.h
@@ -23,14 +23,6 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa
 struct object_info;
 struct odb_source;
 
-/*
- * Return true iff an object database source has a loose object
- * with the specified name.  This function does not respect replace
- * references.
- */
-int odb_source_loose_has_object(struct odb_source *source,
-				const struct object_id *oid);
-
 int odb_source_loose_freshen_object(struct odb_source *source,
 				    const struct object_id *oid);
 

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related

* [PATCH v2 10/18] odb/source-loose: wire up `count_objects()` callback
From: Patrick Steinhardt @ 2026-06-01  8:20 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano
In-Reply-To: <20260601-b4-pks-odb-source-loose-v2-0-90ff159430af@pks.im>

Move `odb_source_loose_count_objects()` and its associated helpers from
"object-file.c" into "odb/source-loose.c" and wire it up as the
`count_objects()` callback of the loose source.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/gc.c       |  6 +++---
 object-file.c      | 60 -----------------------------------------------------
 object-file.h      | 14 -------------
 odb/source-files.c |  2 +-
 odb/source-loose.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 65 insertions(+), 78 deletions(-)

diff --git a/builtin/gc.c b/builtin/gc.c
index 84a66d3240..c26c93ee0f 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -466,6 +466,7 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED)
 
 static int too_many_loose_objects(int limit)
 {
+	struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources);
 	/*
 	 * This is weird, but stems from legacy behaviour: the GC auto
 	 * threshold was always essentially interpreted as if it was rounded up
@@ -474,9 +475,8 @@ static int too_many_loose_objects(int limit)
 	int auto_threshold = DIV_ROUND_UP(limit, 256) * 256;
 	unsigned long loose_count;
 
-	if (odb_source_loose_count_objects(the_repository->objects->sources,
-					   ODB_COUNT_OBJECTS_APPROXIMATE,
-					   &loose_count) < 0)
+	if (odb_source_count_objects(&files->loose->base, ODB_COUNT_OBJECTS_APPROXIMATE,
+				     &loose_count) < 0)
 		return 0;
 
 	return loose_count > auto_threshold;
diff --git a/object-file.c b/object-file.c
index 11957aa44f..9b2044de37 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1602,66 +1602,6 @@ int for_each_loose_file_in_source(struct odb_source *source,
 	return r;
 }
 
-static int count_loose_object(const struct object_id *oid UNUSED,
-			      struct object_info *oi UNUSED,
-			      void *payload)
-{
-	unsigned long *count = payload;
-	(*count)++;
-	return 0;
-}
-
-int odb_source_loose_count_objects(struct odb_source *source,
-				   enum odb_count_objects_flags flags,
-				   unsigned long *out)
-{
-	struct odb_source_files *files = odb_source_files_downcast(source);
-	const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
-	char *path = NULL;
-	DIR *dir = NULL;
-	int ret;
-
-	if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) {
-		unsigned long count = 0;
-		struct dirent *ent;
-
-		path = xstrfmt("%s/17", source->path);
-
-		dir = opendir(path);
-		if (!dir) {
-			if (errno == ENOENT) {
-				*out = 0;
-				ret = 0;
-				goto out;
-			}
-
-			ret = error_errno("cannot open object shard '%s'", path);
-			goto out;
-		}
-
-		while ((ent = readdir(dir)) != NULL) {
-			if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
-			    ent->d_name[hexsz] != '\0')
-				continue;
-			count++;
-		}
-
-		*out = count * 256;
-		ret = 0;
-	} else {
-		struct odb_for_each_object_options opts = { 0 };
-		*out = 0;
-		ret = odb_source_for_each_object(&files->loose->base, NULL, count_loose_object,
-						 out, &opts);
-	}
-
-out:
-	if (dir)
-		closedir(dir);
-	free(path);
-	return ret;
-}
-
 static int check_stream_oid(git_zstream *stream,
 			    const char *hdr,
 			    unsigned long size,
diff --git a/object-file.h b/object-file.h
index 96760db0e1..bc72d89f54 100644
--- a/object-file.h
+++ b/object-file.h
@@ -96,20 +96,6 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr,
 				each_loose_subdir_fn subdir_cb,
 				void *data);
 
-/*
- * Count the number of loose objects in this source.
- *
- * The object count is approximated by opening a single sharding directory for
- * loose objects and scanning its contents. The result is then extrapolated by
- * 256. This should generally work as a reasonable estimate given that the
- * object hash is supposed to be indistinguishable from random.
- *
- * Returns 0 on success, a negative error code otherwise.
- */
-int odb_source_loose_count_objects(struct odb_source *source,
-				   enum odb_count_objects_flags flags,
-				   unsigned long *out);
-
 /**
  * format_object_header() is a thin wrapper around s xsnprintf() that
  * writes the initial "<type> <obj-len>" part of the loose object
diff --git a/odb/source-files.c b/odb/source-files.c
index 4a54b10e4a..d5454e170d 100644
--- a/odb/source-files.c
+++ b/odb/source-files.c
@@ -109,7 +109,7 @@ static int odb_source_files_count_objects(struct odb_source *source,
 	if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) {
 		unsigned long loose_count;
 
-		ret = odb_source_loose_count_objects(source, flags, &loose_count);
+		ret = odb_source_count_objects(&files->loose->base, flags, &loose_count);
 		if (ret < 0)
 			goto out;
 
diff --git a/odb/source-loose.c b/odb/source-loose.c
index 4b8d10bc87..27be066327 100644
--- a/odb/source-loose.c
+++ b/odb/source-loose.c
@@ -520,6 +520,66 @@ static int odb_source_loose_find_abbrev_len(struct odb_source *source,
 	return ret;
 }
 
+static int count_loose_object(const struct object_id *oid UNUSED,
+			      struct object_info *oi UNUSED,
+			      void *payload)
+{
+	unsigned long *count = payload;
+	(*count)++;
+	return 0;
+}
+
+static int odb_source_loose_count_objects(struct odb_source *source,
+					  enum odb_count_objects_flags flags,
+					  unsigned long *out)
+{
+	struct odb_source_loose *loose = odb_source_loose_downcast(source);
+	const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2;
+	char *path = NULL;
+	DIR *dir = NULL;
+	int ret;
+
+	if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) {
+		unsigned long count = 0;
+		struct dirent *ent;
+
+		path = xstrfmt("%s/17", source->path);
+
+		dir = opendir(path);
+		if (!dir) {
+			if (errno == ENOENT) {
+				*out = 0;
+				ret = 0;
+				goto out;
+			}
+
+			ret = error_errno("cannot open object shard '%s'", path);
+			goto out;
+		}
+
+		while ((ent = readdir(dir)) != NULL) {
+			if (strspn(ent->d_name, "0123456789abcdef") != hexsz ||
+			    ent->d_name[hexsz] != '\0')
+				continue;
+			count++;
+		}
+
+		*out = count * 256;
+		ret = 0;
+	} else {
+		struct odb_for_each_object_options opts = { 0 };
+		*out = 0;
+		ret = odb_source_for_each_object(&loose->base, NULL, count_loose_object,
+						 out, &opts);
+	}
+
+out:
+	if (dir)
+		closedir(dir);
+	free(path);
+	return ret;
+}
+
 static void odb_source_loose_clear_cache(struct odb_source_loose *loose)
 {
 	oidtree_clear(loose->cache);
@@ -577,6 +637,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files)
 	loose->base.read_object_stream = odb_source_loose_read_object_stream;
 	loose->base.for_each_object = odb_source_loose_for_each_object;
 	loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len;
+	loose->base.count_objects = odb_source_loose_count_objects;
 
 	if (!is_absolute_path(loose->base.path))
 		chdir_notify_register(NULL, odb_source_loose_reparent, loose);

-- 
2.54.0.926.g75ba10bac6.dirty


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox