git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Patrick Steinhardt <ps@pks.im>,  Taylor Blau <me@ttaylorr.com>
Cc: git@vger.kernel.org,  Jeff King <peff@peff.net>
Subject: Re: [PATCH v2 0/3] builtin/repack: avoid rewriting up-to-date MIDX
Date: Thu, 11 Dec 2025 17:46:13 +0900	[thread overview]
Message-ID: <xmqqsedhe78q.fsf@gitster.g> (raw)
In-Reply-To: <20251210-pks-skip-noop-rewrite-v2-0-f813a9e44f28@pks.im> (Patrick Steinhardt's message of "Wed, 10 Dec 2025 13:52:17 +0100")

This and Taylor's incremental part 3.2 have a slight conflict in
that this topic factors away the logic to compute if we need
recomputing MIDX while the other one tweaks with yet another flag.

My tentative resolution in 'seen' looks like the attached.  Sanity
checking is very much appreciated.

Thanks.

diff --cc midx-write.c
index ce459b02c3,f2dbacef4c..66c125ccb0
--- a/midx-write.c
+++ b/midx-write.c
@@@ -1014,73 -1131,30 +1131,89 @@@ static void clear_midx_files(struct odb
  	strbuf_release(&buf);
  }
  
 +static bool midx_needs_update(struct multi_pack_index *midx, struct write_midx_context *ctx)
 +{
 +	struct strset packs = STRSET_INIT;
 +	struct strbuf buf = STRBUF_INIT;
 +	bool needed = true;
 +
 +	/*
 +	 * Ignore incremental updates for now. The assumption is that any
 +	 * incremental update would be either empty (in which case we will bail
 +	 * out later) or it would actually cover at least one new pack.
 +	 */
- 	if (ctx->incremental)
++	if (ctx->incremental || ctx->compact)
 +		goto out;
 +
 +	/*
 +	 * Otherwise, we need to verify that the packs covered by the existing
 +	 * MIDX match the packs that we already have. The logic to do so is way
 +	 * more complicated than it has any right to be. This is because:
 +	 *
 +	 *   - We cannot assume any ordering.
 +	 *
 +	 *   - The MIDX packs may not be loaded at all, and loading them would
 +	 *     be wasteful. So we need to use the pack names tracked by the
 +	 *     MIDX itself.
 +	 *
 +	 *   - The MIDX pack names are tracking the ".idx" files, whereas the
 +	 *     packs themselves are tracking the ".pack" files. So we need to
 +	 *     strip suffixes.
 +	 */
 +	if (ctx->nr != midx->num_packs + midx->num_packs_in_base)
 +		goto out;
 +
 +	for (uint32_t i = 0; i < ctx->nr; i++) {
 +		strbuf_reset(&buf);
 +		strbuf_addstr(&buf, pack_basename(ctx->info[i].p));
 +		strbuf_strip_suffix(&buf, ".pack");
 +
 +		if (!strset_add(&packs, buf.buf))
 +			BUG("same pack added twice?");
 +	}
 +
 +	for (uint32_t i = 0; i < ctx->nr; i++) {
 +		strbuf_reset(&buf);
 +		strbuf_addstr(&buf, midx->pack_names[i]);
 +		strbuf_strip_suffix(&buf, ".idx");
 +
 +		if (!strset_contains(&packs, buf.buf))
 +			goto out;
 +		strset_remove(&packs, buf.buf);
 +	}
 +
 +	needed = false;
 +
 +out:
 +	strbuf_release(&buf);
 +	strset_clear(&packs);
 +	return needed;
 +}
 +
- static int write_midx_internal(struct odb_source *source,
- 			       struct string_list *packs_to_include,
- 			       struct string_list *packs_to_drop,
- 			       const char *preferred_pack_name,
- 			       const char *refs_snapshot,
- 			       unsigned flags)
+ static int midx_hashcmp(const struct multi_pack_index *a,
+ 			const struct multi_pack_index *b,
+ 			const struct git_hash_algo *algop)
  {
- 	struct repository *r = source->odb->repo;
+ 	return hashcmp(get_midx_hash(a), get_midx_hash(b), algop);
+ }
+ 
+ struct write_midx_opts {
+ 	struct odb_source *source;
+ 
+ 	struct string_list *packs_to_include;
+ 	struct string_list *packs_to_drop;
+ 
+ 	struct multi_pack_index *compact_from;
+ 	struct multi_pack_index *compact_to;
+ 
+ 	const char *preferred_pack_name;
+ 	const char *refs_snapshot;
+ 	unsigned flags;
+ };
+ 
+ static int write_midx_internal(struct write_midx_opts *opts)
+ {
+ 	struct repository *r = opts->source->odb->repo;
  	struct strbuf midx_name = STRBUF_INIT;
  	unsigned char midx_hash[GIT_MAX_RAWSZ];
  	uint32_t start_pack;
@@@ -1166,44 -1257,43 +1317,54 @@@
  	else
  		ctx.progress = NULL;
  
- 	ctx.to_include = packs_to_include;
+ 	if (ctx.compact) {
+ 		int bitmap_order = 0;
+ 		if (opts->preferred_pack_name)
+ 			bitmap_order |= 1;
+ 		else if (opts->flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
+ 			bitmap_order |= 1;
  
- 	for_each_file_in_pack_dir(source->path, add_pack_to_midx, &ctx);
+ 		fill_packs_from_midx_range(&ctx, bitmap_order);
+ 	} else {
+ 		ctx.to_include = opts->packs_to_include;
+ 		for_each_file_in_pack_dir(opts->source->path, add_pack_to_midx, &ctx);
+ 	}
  	stop_progress(&ctx.progress);
  
- 	if (!packs_to_drop) {
 -	if ((ctx.m && ctx.nr == ctx.m->num_packs + ctx.m->num_packs_in_base) &&
 -	    !ctx.incremental &&
 -	    !ctx.compact &&
 -	    !(opts->packs_to_include || opts->packs_to_drop)) {
 -		struct bitmap_index *bitmap_git;
 -		int bitmap_exists;
 -		int want_bitmap = opts->flags & MIDX_WRITE_BITMAP;
 -
 -		bitmap_git = prepare_midx_bitmap_git(ctx.m);
 -		bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
 -		free_bitmap_index(bitmap_git);
 -
 -		if (bitmap_exists || !want_bitmap) {
 -			/*
 -			 * The correct MIDX already exists, and so does a
 -			 * corresponding bitmap (or one wasn't requested).
 -			 */
 -			if (!want_bitmap)
 -				clear_midx_files_ext(opts->source, "bitmap",
 -						     NULL);
 -			result = 0;
 -			goto cleanup;
++	if (!opts->packs_to_drop) {
 +		/*
 +		 * If there is no MIDX then either it doesn't exist, or we're
 +		 * doing a geometric repack. Try to load it from the source to
 +		 * tell these two cases apart.
 +		 */
 +		struct multi_pack_index *midx = ctx.m;
 +		if (!midx)
 +			midx = midx_to_free = load_multi_pack_index(ctx.source);
 +
 +		if (midx && !midx_needs_update(midx, &ctx)) {
 +			struct bitmap_index *bitmap_git;
 +			int bitmap_exists;
- 			int want_bitmap = flags & MIDX_WRITE_BITMAP;
++			int want_bitmap = opts->flags & MIDX_WRITE_BITMAP;
 +
 +			bitmap_git = prepare_midx_bitmap_git(midx);
 +			bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git);
 +			free_bitmap_index(bitmap_git);
 +
 +			if (bitmap_exists || !want_bitmap) {
 +				/*
 +				 * The correct MIDX already exists, and so does a
 +				 * corresponding bitmap (or one wasn't requested).
 +				 */
 +				if (!want_bitmap)
- 					clear_midx_files_ext(source, "bitmap", NULL);
++					clear_midx_files_ext(opts->source,
++							     "bitmap", NULL);
 +				result = 0;
 +				goto cleanup;
 +			}
  		}
 +
 +		close_midx(midx_to_free);
 +		midx_to_free = NULL;
  	}
  
  	if (ctx.incremental && !ctx.nr) {

  parent reply	other threads:[~2025-12-11  8:46 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-12-08 18:27 [PATCH 0/2] builtin/repack: avoid rewriting up-to-date MIDX Patrick Steinhardt
2025-12-08 18:27 ` [PATCH 1/2] midx: fix `BUG()` when getting preferred pack without a reverse index Patrick Steinhardt
2025-12-10  0:22   ` Taylor Blau
2025-12-10  9:40     ` Patrick Steinhardt
2025-12-18 21:13       ` Taylor Blau
2025-12-08 18:27 ` [PATCH 2/2] builtin/repack: don't regenerate MIDX unless needed Patrick Steinhardt
2025-12-10  2:48   ` Taylor Blau
2025-12-10  9:40     ` Patrick Steinhardt
2025-12-10 12:52 ` [PATCH v2 0/3] builtin/repack: avoid rewriting up-to-date MIDX Patrick Steinhardt
2025-12-10 12:52   ` [PATCH v2 1/3] midx: fix `BUG()` when getting preferred pack without a reverse index Patrick Steinhardt
2025-12-10 12:52   ` [PATCH v2 2/3] midx-write: extract function to test whether MIDX needs updating Patrick Steinhardt
2025-12-10 12:52   ` [PATCH v2 3/3] midx-write: skip rewriting MIDX with `--stdin-packs` unless needed Patrick Steinhardt
2025-12-11  8:46   ` Junio C Hamano [this message]
2025-12-12  7:33     ` [PATCH v2 0/3] builtin/repack: avoid rewriting up-to-date MIDX Patrick Steinhardt
2025-12-18 21:18       ` Taylor Blau
2025-12-19  6:25         ` Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqsedhe78q.fsf@gitster.g \
    --to=gitster@pobox.com \
    --cc=git@vger.kernel.org \
    --cc=me@ttaylorr.com \
    --cc=peff@peff.net \
    --cc=ps@pks.im \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).